scanner37.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. # Copyright (c) 2016-2019, 2021-2022, 2024 by Rocky Bernstein
  2. #
  3. # This program is free software: you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation, either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. """
  16. Python 3.7 bytecode decompiler scanner.
  17. Does some additional massaging of xdis-disassembled instructions to
  18. make things easier for decompilation.
  19. This sets up opcodes Python's 3.7 and calls a generalized
  20. scanner routine for Python 3.
  21. """
  22. from typing import Tuple
  23. # bytecode verification, verify(), uses JUMP_OPs from here
  24. from xdis.opcodes import opcode_37 as opc
  25. from decompyle3.scanners.scanner37base import Scanner37Base
  26. # bytecode verification, verify(), uses JUMP_OPS from here
  27. JUMP_OPs = opc.JUMP_OPS
  28. class Scanner37(Scanner37Base):
  29. def __init__(self, show_asm=None, debug="", is_pypy=False):
  30. Scanner37Base.__init__(self, (3, 7), show_asm, debug, is_pypy)
  31. self.debug = debug
  32. return
  33. pass
  34. def ingest(
  35. self, bytecode, classname=None, code_objects={}, show_asm=None
  36. ) -> Tuple[list, dict]:
  37. """
  38. Create "tokens" the bytecode of an Python code object. Largely these
  39. are the opcode name, but in some cases that has been modified to make parsing
  40. easier.
  41. returning a list of uncompyle6 Token's.
  42. Some transformations are made to assist the deparsing grammar:
  43. - various types of LOAD_CONST's are categorized in terms of what they load
  44. - COME_FROM instructions are added to assist parsing control structures
  45. - operands with stack argument counts or flag masks are appended to the
  46. opcode name, e.g.:
  47. * BUILD_LIST, BUILD_SET
  48. * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional
  49. arguments
  50. - EXTENDED_ARGS instructions are removed
  51. Also, when we encounter certain tokens, we add them to a set
  52. which will cause custom grammar rules. Specifically, variable
  53. arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules
  54. for the specific number of arguments they take.
  55. """
  56. tokens, customize = Scanner37Base.ingest(
  57. self, bytecode, classname, code_objects, show_asm
  58. )
  59. new_tokens = []
  60. for i, t in enumerate(tokens):
  61. # things that smash new_tokens like BUILD_LIST have to come first.
  62. if t.op in (
  63. self.opc.BUILD_CONST_KEY_MAP,
  64. self.opc.BUILD_LIST,
  65. self.opc.BUILD_SET,
  66. ):
  67. collection_type = (
  68. "DICT"
  69. if t.kind.startswith("BUILD_CONST_KEY_MAP")
  70. else t.kind.split("_")[1]
  71. )
  72. new_tokens = self.bound_collection_from_tokens(
  73. tokens, new_tokens, t, i, f"CONST_{collection_type}"
  74. )
  75. continue
  76. # The lowest bit of flags indicates whether the
  77. # var-keyword argument is placed at the top of the stack
  78. if t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1:
  79. t.kind = "CALL_FUNCTION_EX_KW"
  80. pass
  81. elif t.op == self.opc.BUILD_STRING:
  82. t.kind = "BUILD_STRING_%s" % t.attr
  83. elif t.op == self.opc.CALL_FUNCTION_KW:
  84. t.kind = "CALL_FUNCTION_KW_%s" % t.attr
  85. elif t.op == self.opc.FORMAT_VALUE:
  86. if t.attr & 0x4:
  87. t.kind = "FORMAT_VALUE_ATTR"
  88. pass
  89. elif t.op == self.opc.BUILD_MAP_UNPACK_WITH_CALL:
  90. t.kind = "BUILD_MAP_UNPACK_WITH_CALL_%d" % t.attr
  91. elif (not self.is_pypy) and t.op == self.opc.BUILD_TUPLE_UNPACK_WITH_CALL:
  92. t.kind = "BUILD_TUPLE_UNPACK_WITH_CALL_%d" % t.attr
  93. new_tokens.append(t)
  94. return new_tokens, customize
  95. if __name__ == "__main__":
  96. from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
  97. if PYTHON_VERSION_TRIPLE[:2] == (3, 7):
  98. import inspect
  99. co = inspect.currentframe().f_code # type: ignore
  100. tokens, customize = Scanner37().ingest(co)
  101. for t in tokens:
  102. print(t.format())
  103. pass
  104. else:
  105. print(f"Need to be Python 3.7 to demo; I am version {version_tuple_to_str()}.")