scanner38-next.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. # Copyright (c) 2019-2022, 2024 by Rocky Bernstein
  2. #
  3. # This program is free software: you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation, either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. """
  16. Python 3.8 bytecode decompiler scanner.
  17. Does some additional massaging of xdis-disassembled instructions to
  18. make things easier for decompilation.
  19. This sets up opcodes Python's 3.8 and calls a generalized
  20. scanner routine for Python 3.7 and up.
  21. """
  22. from typing import Dict, List, Tuple
  23. # bytecode verification, verify(), uses JUMP_OPs from here
  24. from xdis.opcodes import opcode_38 as opc
  25. from decompyle3.scanners.scanner37 import Scanner37
  26. from decompyle3.scanners.scanner37base import Scanner37Base
  27. from decompyle3.scanners.tok import off2int
  28. # bytecode verification, verify(), uses JUMP_OPS from here
  29. JUMP_OPs = opc.JUMP_OPS
  30. class Scanner38(Scanner37):
  31. def __init__(self, show_asm=None, debug="", is_pypy=False):
  32. Scanner37Base.__init__(self, (3, 8), show_asm, debug, is_pypy)
  33. self.debug = debug
  34. return
  35. pass
  36. def ingest(
  37. self, bytecode, classname=None, code_objects={}, show_asm=None
  38. ) -> Tuple[list, dict]:
  39. """
  40. Create "tokens" the bytecode of an Python code object. Largely these
  41. are the opcode name, but in some cases that has been modified to make parsing
  42. easier.
  43. returning a list of decompyle3 Token's.
  44. Some transformations are made to assist the deparsing grammar:
  45. - various types of LOAD_CONST's are categorized in terms of what they load
  46. - COME_FROM instructions are added to assist parsing control structures
  47. - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
  48. * BUILD_LIST, BUILD_SET
  49. * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
  50. - EXTENDED_ARGS instructions are removed
  51. Also, when we encounter certain tokens, we add them to a set which will cause custom
  52. grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
  53. cause specific rules for the specific number of arguments they take.
  54. """
  55. tokens, customize = super(Scanner38, self).ingest(
  56. bytecode, classname, code_objects, show_asm
  57. )
  58. # Hacky way to detect loop ranges. The key in
  59. # jump_back_targets is the start of the loop. The value is
  60. # where the loop ends. In current Python, to an earlier offset
  61. # are always to loops. And blocks are ordered so that the
  62. # JUMP_LOOP with the highest offset will be where the range
  63. # ends.
  64. jump_back_targets: Dict[int, int] = {}
  65. for token in tokens:
  66. if token.kind == "JUMP_LOOP":
  67. jump_back_targets[token.attr] = token.offset
  68. pass
  69. pass
  70. if self.debug and jump_back_targets:
  71. print(jump_back_targets)
  72. loop_ends: List[int] = []
  73. next_end = tokens[len(tokens) - 1].off2int() + 10
  74. new_tokens = []
  75. for token in tokens:
  76. opname = token.kind
  77. offset = token.offset
  78. if token.off2int(prefer_last=False) == next_end:
  79. loop_ends.pop()
  80. if self.debug:
  81. print(f"{' ' * len(loop_ends)}remove loop offset {offset}")
  82. pass
  83. next_end = (
  84. loop_ends[-1]
  85. if len(loop_ends)
  86. else tokens[len(tokens) - 1].off2int() + 10
  87. )
  88. # things that smash new_tokens like BUILD_LIST have to come first.
  89. if offset in jump_back_targets:
  90. next_end = off2int(jump_back_targets[offset], prefer_last=False)
  91. if self.debug:
  92. print(
  93. f"{' ' * len(loop_ends)}adding loop offset {offset} ending "
  94. f"at {next_end}"
  95. )
  96. loop_ends.append(next_end)
  97. # Turn JUMP opcodes into "BREAK_LOOP" opcodes.
  98. # FIXME!!!!: this should be replaced by proper control flow.
  99. if opname in ("JUMP_FORWARD", "JUMP_ABSOLUTE") and len(loop_ends):
  100. jump_target = token.attr
  101. if jump_target > loop_ends[-1]:
  102. token.kind = "BREAK_LOOP"
  103. else:
  104. if opname == "JUMP_ABSOLUTE" and jump_target <= next_end:
  105. # Not a forward-enough jump to break out of the
  106. # next loop, so continue. FIXME: Do we need
  107. # "continue" detection?
  108. new_tokens.append(token)
  109. continue
  110. j = i
  111. while tokens[j-1] in ("POP_TOP", "POP_BLOCK", "POP_EXCEPT"):
  112. j -= 1
  113. if tokens[j].linestart:
  114. break
  115. token_with_linestart = tokens[j]
  116. if token_with_linestart.linestart:
  117. token.kind = "BREAK_LOOP"
  118. pass
  119. pass
  120. new_tokens.append(token)
  121. return new_tokens, customize
  122. if __name__ == "__main__":
  123. from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
  124. if PYTHON_VERSION_TRIPLE[:2] == (3, 8):
  125. import inspect
  126. co = inspect.currentframe().f_code # type: ignore
  127. tokens, customize = Scanner38().ingest(co)
  128. for t in tokens:
  129. print(t.format())
  130. pass
  131. else:
  132. print(f"Need to be Python 3.8 to demo; I am version {version_tuple_to_str()}.")