tok.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. # Copyright (c) 2016-2020, 2023-2024 by Rocky Bernstein
  2. # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
  3. # Copyright (c) 1999 John Aycock
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. import re
  18. import sys
  19. from typing import Optional, Union
  20. def off2int(offset: int, prefer_last=True) -> int:
  21. if isinstance(offset, int):
  22. return offset
  23. else:
  24. assert isinstance(offset, str)
  25. offsets = list(map(int, offset.split("_")))
  26. if len(offsets) == 1:
  27. return offsets[0]
  28. else:
  29. assert 2 <= len(offsets) <= 3
  30. if len(offsets) == 3:
  31. offsets = offsets[:-1]
  32. assert len(offsets) == 2
  33. offset_1, offset_2 = offsets
  34. if offset_1 + 2 == offset_2:
  35. # This is an instruction with an extended arg.
  36. # For things that compare against offsets, we generally want the
  37. # later offset.
  38. return offset_2 if prefer_last else offset_1
  39. else:
  40. # Probably a "COME_FROM"-type offset, where the second number
  41. # is just a count, and not really an offset.
  42. return offset_1
  43. class Token:
  44. """
  45. Class representing a byte-code instruction.
  46. A byte-code token is equivalent to Python 3's dis.instruction or
  47. the contents of one line as output by dis.dis().
  48. """
  49. # FIXME: match Python 3.4's terms:
  50. # linestart = starts_line
  51. # attr = argval
  52. # pattr = argrepr
  53. def __init__(
  54. self,
  55. opname: str,
  56. attr=None,
  57. pattr=None,
  58. offset: Union[int, str] = -1,
  59. linestart=None,
  60. op=None,
  61. has_arg=None,
  62. opc=None,
  63. # extended arg indicates that this token was preceded
  64. # by EXTENDED_ARG. Note that the offset passed
  65. # is the EXTENDED_ARG's offset even though
  66. # the instruction associated with opname sits
  67. # at next offset
  68. has_extended_arg=False,
  69. tos_str=None,
  70. start_offset=None,
  71. optype: Optional[str] = None,
  72. ):
  73. self.attr = attr
  74. self.has_arg = has_arg
  75. self.kind = sys.intern(opname)
  76. self.linestart = linestart
  77. self.offset = f"{offset}_{offset+2}" if has_extended_arg else offset
  78. self.optype = optype
  79. self.pattr = pattr
  80. self.start_offset = start_offset
  81. self.tos_str = tos_str
  82. if has_arg is False:
  83. self.attr = None
  84. self.pattr = None
  85. if opc is None:
  86. try:
  87. from xdis.std import _std_api
  88. except KeyError as e:
  89. print(f"I don't know about Python version {e} yet.")
  90. try:
  91. version_tuple = tuple(int(i) for i in str(e)[1:-1].split("."))
  92. except Exception:
  93. pass
  94. else:
  95. if version_tuple > (3, 9):
  96. print("Python versions 3.9 and greater are not supported.")
  97. else:
  98. print(f"xdis might need to be informed about version {e}")
  99. return
  100. self.opc = _std_api.opc
  101. else:
  102. self.opc = opc
  103. if op is None:
  104. self.op = self.opc.opmap.get(self.kind, None)
  105. else:
  106. self.op = op
  107. def __eq__(self, o) -> bool:
  108. """'==' on kind and "pattr" attributes.
  109. It is okay if offsets and linestarts are different"""
  110. if isinstance(o, Token):
  111. return (self.kind == o.kind) and (
  112. (self.pattr == o.pattr) or self.attr == o.attr
  113. )
  114. else:
  115. # ?? do we need this?
  116. return self.kind == o
  117. def __ne__(self, o) -> bool:
  118. """'!=', but it's okay if offsets and linestarts are different"""
  119. return not self.__eq__(o)
  120. def __repr__(self) -> str:
  121. return str(self.kind)
  122. # def __str__(self):
  123. # pattr = self.pattr if self.pattr is not None else ''
  124. # prefix = '\n%3d ' % self.linestart if self.linestart else (' ' * 6)
  125. # return (prefix +
  126. # ('%9s %-18s %r' % (self.offset, self.kind, pattr)))
  127. def __str__(self) -> str:
  128. return self.format(line_prefix="")
  129. def format(self, line_prefix="", token_num=None) -> str:
  130. if token_num is not None:
  131. prefix = (
  132. "\n(%03d)%s L.%4d " % (token_num, line_prefix, self.linestart)
  133. if self.linestart
  134. else ("(%03d)%s" % (token_num, " " * (9 + len(line_prefix))))
  135. )
  136. else:
  137. prefix = (
  138. "\n%s L.%4d " % (line_prefix, self.linestart)
  139. if self.linestart
  140. else (" " * (9 + len(line_prefix)))
  141. )
  142. offset_opname = "%8s %-17s" % (self.offset, self.kind)
  143. if not self.has_arg:
  144. return "%s%s" % (prefix, offset_opname)
  145. argstr = "%6d " % self.attr if isinstance(self.attr, int) else (" " * 7)
  146. name = self.kind
  147. if self.has_arg:
  148. pattr = self.tos_str if self.tos_str is not None else self.pattr
  149. if self.opc:
  150. if self.op in self.opc.JREL_OPS:
  151. if not self.pattr.startswith("to "):
  152. pattr = "to " + self.pattr
  153. elif self.op in self.opc.JABS_OPS:
  154. self.pattr = str(self.pattr)
  155. if not self.pattr.startswith("to "):
  156. pattr = "to " + str(self.pattr)
  157. pass
  158. elif self.op in self.opc.CONST_OPS:
  159. if name == "LOAD_STR":
  160. pattr = self.attr
  161. elif name == "LOAD_CODE":
  162. return "%s%s%s %s" % (prefix, offset_opname, argstr, pattr)
  163. else:
  164. return "%s%s %r" % (prefix, offset_opname, pattr)
  165. elif self.op in self.opc.hascompare:
  166. if isinstance(self.attr, int):
  167. pattr = self.opc.cmp_op[self.attr]
  168. return "%s%s%s %s" % (prefix, offset_opname, argstr, pattr)
  169. elif self.op in self.opc.hasvargs:
  170. return "%s%s%s" % (prefix, offset_opname, argstr)
  171. elif name == "LOAD_ASSERT":
  172. return "%s%s %s" % (prefix, offset_opname, pattr)
  173. elif self.op in self.opc.NAME_OPS:
  174. return "%s%s%s %s" % (prefix, offset_opname, argstr, pattr)
  175. elif name == "EXTENDED_ARG":
  176. return "%s%s%s 0x%x << %s = %s" % (
  177. prefix,
  178. offset_opname,
  179. argstr,
  180. self.attr,
  181. self.opc.EXTENDED_ARG_SHIFT,
  182. pattr,
  183. )
  184. # And so on. See xdis/bytecode.py get_instructions_bytes
  185. pass
  186. elif re.search(r"_\d+$", self.kind):
  187. return "%s%s%s" % (prefix, offset_opname, argstr)
  188. else:
  189. pattr = ""
  190. return "%s%s%s %r" % (prefix, offset_opname, argstr, pattr)
  191. def __hash__(self):
  192. return hash(self.kind)
  193. def __getitem__(self, i: int):
  194. raise IndexError
  195. def off2int(self, prefer_last=True) -> int:
  196. """
  197. Return an offset for this token. Note that the
  198. token type can sometimes be a string when the token
  199. encompasses one or more EXTENDED_ARG instructions.
  200. """
  201. return off2int(self.offset, prefer_last)
  202. NoneToken = Token("LOAD_CONST", offset=-1, attr=None, pattr=None)