tok.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. # Copyright (c) 2016-2021, 2023-2025 by Rocky Bernstein
  2. # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
  3. # Copyright (c) 1999 John Aycock
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. import re
  18. import sys
  19. from typing import Optional, Union
  20. intern = sys.intern
  21. def off2int(offset, prefer_last=True):
  22. if isinstance(offset, int):
  23. return offset
  24. else:
  25. assert isinstance(offset, str)
  26. offsets = list(map(int, offset.split("_")))
  27. if len(offsets) == 1:
  28. return offsets[0]
  29. else:
  30. assert len(offsets) == 2
  31. offset_1, offset_2 = offsets
  32. if offset_1 + 2 == offset_2:
  33. # This is an instruction with an extended arg.
  34. # For things that compare against offsets, we generally want the
  35. # later offset.
  36. return offset_2 if prefer_last else offset_1
  37. else:
  38. # Probably a "COME_FROM"-type offset, where the second number
  39. # is just a count, and not really an offset.
  40. return offset_1
  41. class Token:
  42. """
  43. Class representing a byte-code instruction.
  44. A byte-code token is equivalent to Python 3's dis.instruction or
  45. the contents of one line as output by dis.dis().
  46. """
  47. # FIXME: match Python 3.4's terms:
  48. # linestart = starts_line
  49. # attr = argval
  50. # pattr = argrepr
  51. def __init__(
  52. self,
  53. opname,
  54. attr=None,
  55. pattr=None,
  56. offset: Union[int, str] = -1,
  57. linestart=None,
  58. op=None,
  59. has_arg=None,
  60. opc=None,
  61. has_extended_arg=False,
  62. optype=None,
  63. ):
  64. self.kind = intern(opname)
  65. self.has_arg = has_arg
  66. self.attr: Optional[int] = attr
  67. self.pattr = pattr
  68. self.optype = optype
  69. if has_extended_arg:
  70. self.offset = "%d_%d" % (offset, offset + 2)
  71. else:
  72. self.offset = offset
  73. self.linestart = linestart
  74. if has_arg is False:
  75. self.attr = None
  76. self.pattr = None
  77. if opc is None:
  78. try:
  79. from xdis.std import _std_api
  80. except KeyError as e:
  81. print(f"I don't know about Python version {e} yet.")
  82. try:
  83. version_tuple = tuple(int(i) for i in str(e)[1:-1].split("."))
  84. except Exception:
  85. pass
  86. else:
  87. if version_tuple > (3, 9):
  88. print("Python versions 3.9 and greater are not supported.")
  89. else:
  90. print(f"xdis might need to be informed about version {e}")
  91. return
  92. self.opc = _std_api.opc
  93. else:
  94. self.opc = opc
  95. if op is None:
  96. self.op = self.opc.opmap.get(self.kind, None)
  97. else:
  98. self.op = op
  99. def __eq__(self, o):
  100. """'==' on kind and "pattr" attributes.
  101. It is okay if offsets and linestarts are different"""
  102. if isinstance(o, Token):
  103. return (self.kind == o.kind) and (
  104. (self.pattr == o.pattr) or self.attr == o.attr
  105. )
  106. else:
  107. # ?? do we need this?
  108. return self.kind == o
  109. def __ne__(self, o):
  110. """'!=', but it's okay if offsets and linestarts are different"""
  111. return not self.__eq__(o)
  112. def __repr__(self):
  113. return str(self.kind)
  114. # def __str__(self):
  115. # pattr = self.pattr if self.pattr is not None else ''
  116. # prefix = '\n%3d ' % self.linestart if self.linestart else (' ' * 6)
  117. # return (prefix +
  118. # ('%9s %-18s %r' % (self.offset, self.kind, pattr)))
  119. def __str__(self):
  120. return self.format(line_prefix="")
  121. def format(self, line_prefix="", token_num=None):
  122. if token_num is not None:
  123. prefix = (
  124. "\n(%03d)%s L.%4d " % (token_num, line_prefix, self.linestart)
  125. if self.linestart
  126. else ("(%03d)%s" % (token_num, " " * (9 + len(line_prefix))))
  127. )
  128. else:
  129. prefix = (
  130. "\n%s L.%4d " % (line_prefix, self.linestart)
  131. if self.linestart
  132. else (" " * (9 + len(line_prefix)))
  133. )
  134. offset_opname = "%8s %-17s" % (self.offset, self.kind)
  135. if not self.has_arg:
  136. return "%s%s" % (prefix, offset_opname)
  137. argstr = "%6d " % self.attr if isinstance(self.attr, int) else (" " * 7)
  138. name = self.kind
  139. if self.has_arg:
  140. pattr = self.pattr
  141. if self.opc:
  142. if self.op in self.opc.JREL_OPS:
  143. if not self.pattr.startswith("to "):
  144. pattr = "to " + self.pattr
  145. elif self.op in self.opc.JABS_OPS:
  146. self.pattr = str(self.pattr)
  147. if not self.pattr.startswith("to "):
  148. pattr = "to " + str(self.pattr)
  149. pass
  150. elif self.op in self.opc.CONST_OPS:
  151. if name == "LOAD_STR":
  152. pattr = self.attr
  153. elif name == "LOAD_CODE":
  154. return "%s%s%s %s" % (prefix, offset_opname, argstr, pattr)
  155. else:
  156. return "%s%s %r" % (prefix, offset_opname, pattr)
  157. elif self.op in self.opc.hascompare:
  158. if isinstance(self.attr, int):
  159. pattr = self.opc.cmp_op[self.attr]
  160. return "%s%s%s %s" % (prefix, offset_opname, argstr, pattr)
  161. elif self.op in self.opc.hasvargs:
  162. return "%s%s%s" % (prefix, offset_opname, argstr)
  163. elif name == "LOAD_ASSERT":
  164. return "%s%s %s" % (prefix, offset_opname, pattr)
  165. elif self.op in self.opc.NAME_OPS:
  166. if self.opc.version_tuple >= (3, 0):
  167. return "%s%s%s %s" % (prefix, offset_opname, argstr, self.attr)
  168. elif name == "EXTENDED_ARG":
  169. return "%s%s%s 0x%x << %s = %s" % (
  170. prefix,
  171. offset_opname,
  172. argstr,
  173. self.attr,
  174. self.opc.EXTENDED_ARG_SHIFT,
  175. pattr,
  176. )
  177. # And so on. See xdis/bytecode.py get_instructions_bytes
  178. pass
  179. elif re.search(r"_\d+$", self.kind):
  180. return "%s%s%s" % (prefix, offset_opname, argstr)
  181. else:
  182. pattr = ""
  183. return "%s%s%s %r" % (prefix, offset_opname, argstr, pattr)
  184. def __hash__(self):
  185. return hash(self.kind)
  186. def __getitem__(self, i):
  187. raise IndexError
  188. def off2int(self, prefer_last=True):
  189. return off2int(self.offset, prefer_last)
  190. NoneToken = Token("LOAD_CONST", offset=-1, attr=None, pattr=None)