scanner37.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. # Copyright (c) 2016-2019, 2021-2023 by Rocky Bernstein
  2. #
  3. # This program is free software: you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation, either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. """
  16. Python 3.7 bytecode decompiler scanner.
  17. Does some additional massaging of xdis-disassembled instructions to
  18. make things easier for decompilation.
  19. This sets up opcodes Python's 3.7 and calls a generalized
  20. scanner routine for Python 3.
  21. """
  22. from typing import Tuple
  23. # bytecode verification, verify(), uses JUMP_OPs from here
  24. from xdis.opcodes import opcode_37 as opc
  25. from uncompyle6.scanner import CONST_COLLECTIONS, Token
  26. from uncompyle6.scanners.scanner37base import Scanner37Base
  27. # bytecode verification, verify(), uses JUMP_OPS from here
  28. JUMP_OPs = opc.JUMP_OPS
  29. class Scanner37(Scanner37Base):
  30. def __init__(self, show_asm=None, debug="", is_pypy=False):
  31. Scanner37Base.__init__(self, (3, 7), show_asm, debug, is_pypy)
  32. self.debug = debug
  33. return
  34. pass
  35. def bound_collection_from_tokens(
  36. self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
  37. ) -> list:
  38. count = t.attr
  39. assert isinstance(count, int)
  40. assert count <= i
  41. if collection_type == "CONST_DICT":
  42. # constant dictionaries work via BUILD_CONST_KEY_MAP and
  43. # handle the values() like sets and lists.
  44. # However, the keys() are an LOAD_CONST of the keys.
  45. # adjust offset to account for this
  46. count += 1
  47. # For small lists don't bother
  48. if count < 5:
  49. return next_tokens + [t]
  50. collection_start = i - count
  51. for j in range(collection_start, i):
  52. if tokens[j].kind not in (
  53. "LOAD_CODE",
  54. "LOAD_CONST",
  55. "LOAD_FAST",
  56. "LOAD_GLOBAL",
  57. "LOAD_NAME",
  58. "LOAD_STR",
  59. ):
  60. return next_tokens + [t]
  61. collection_enum = CONST_COLLECTIONS.index(collection_type)
  62. # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
  63. # add a boundary marker and change LOAD_CONST to something else.
  64. new_tokens = next_tokens[:-count]
  65. start_offset = tokens[collection_start].offset
  66. new_tokens.append(
  67. Token(
  68. opname="COLLECTION_START",
  69. attr=collection_enum,
  70. pattr=collection_type,
  71. offset=f"{start_offset}_0",
  72. linestart=False,
  73. has_arg=True,
  74. has_extended_arg=False,
  75. opc=self.opc,
  76. optype="pseudo",
  77. )
  78. )
  79. for j in range(collection_start, i):
  80. new_tokens.append(
  81. Token(
  82. opname="ADD_VALUE",
  83. attr=tokens[j].attr,
  84. pattr=tokens[j].pattr,
  85. offset=tokens[j].offset,
  86. linestart=tokens[j].linestart,
  87. has_arg=True,
  88. has_extended_arg=False,
  89. opc=self.opc,
  90. optype=tokens[j].optype,
  91. )
  92. )
  93. new_tokens.append(
  94. Token(
  95. opname=f"BUILD_{collection_type}",
  96. attr=t.attr,
  97. pattr=t.pattr,
  98. offset=t.offset,
  99. linestart=t.linestart,
  100. has_arg=t.has_arg,
  101. has_extended_arg=False,
  102. opc=t.opc,
  103. )
  104. )
  105. return new_tokens
  106. def ingest(
  107. self, bytecode, classname=None, code_objects={}, show_asm=None
  108. ) -> Tuple[list, dict]:
  109. """
  110. Create "tokens" the bytecode of an Python code object. Largely these
  111. are the opcode name, but in some cases that has been modified to make parsing
  112. easier.
  113. returning a list of uncompyle6 Token's.
  114. Some transformations are made to assist the deparsing grammar:
  115. - various types of LOAD_CONST's are categorized in terms of what they load
  116. - COME_FROM instructions are added to assist parsing control structures
  117. - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
  118. * BUILD_LIST, BUILD_SET
  119. * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
  120. - EXTENDED_ARGS instructions are removed
  121. Also, when we encounter certain tokens, we add them to a set which will cause custom
  122. grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
  123. cause specific rules for the specific number of arguments they take.
  124. """
  125. tokens, customize = Scanner37Base.ingest(
  126. self, bytecode, classname, code_objects, show_asm
  127. )
  128. new_tokens = []
  129. for i, t in enumerate(tokens):
  130. # things that smash new_tokens like BUILD_LIST have to come first.
  131. if t.op in (
  132. self.opc.BUILD_CONST_KEY_MAP,
  133. self.opc.BUILD_LIST,
  134. self.opc.BUILD_SET,
  135. ):
  136. collection_type = (
  137. "DICT"
  138. if t.kind.startswith("BUILD_CONST_KEY_MAP")
  139. else t.kind.split("_")[1]
  140. )
  141. new_tokens = self.bound_collection_from_tokens(
  142. tokens, new_tokens, t, i, f"CONST_{collection_type}"
  143. )
  144. continue
  145. # The lowest bit of flags indicates whether the
  146. # var-keyword argument is placed at the top of the stack
  147. if t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1:
  148. t.kind = "CALL_FUNCTION_EX_KW"
  149. pass
  150. elif t.op == self.opc.BUILD_STRING:
  151. t.kind = "BUILD_STRING_%s" % t.attr
  152. elif t.op == self.opc.CALL_FUNCTION_KW:
  153. t.kind = "CALL_FUNCTION_KW_%s" % t.attr
  154. elif t.op == self.opc.FORMAT_VALUE:
  155. if t.attr & 0x4:
  156. t.kind = "FORMAT_VALUE_ATTR"
  157. pass
  158. elif t.op == self.opc.BUILD_MAP_UNPACK_WITH_CALL:
  159. t.kind = "BUILD_MAP_UNPACK_WITH_CALL_%d" % t.attr
  160. elif not self.is_pypy and t.op == self.opc.BUILD_TUPLE_UNPACK_WITH_CALL:
  161. t.kind = "BUILD_TUPLE_UNPACK_WITH_CALL_%d" % t.attr
  162. new_tokens.append(t)
  163. return new_tokens, customize
  164. if __name__ == "__main__":
  165. from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
  166. if PYTHON_VERSION_TRIPLE[:2] == (3, 7):
  167. import inspect
  168. co = inspect.currentframe().f_code # type: ignore
  169. tokens, customize = Scanner37().ingest(co)
  170. for t in tokens:
  171. print(t.format())
  172. pass
  173. else:
  174. print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.")