code_fns.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. # Copyright (c) 2015-2016, 2818-2024 by Rocky Bernstein
  2. # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
  3. # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
  4. # Copyright (c) 1999 John Aycock
  5. #
  6. # This program is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. """
  19. CPython magic- and version- independent disassembly routines
  20. There are two reasons we can't use Python's built-in routines
  21. from dis. First, the bytecode we are extracting may be from a different
  22. version of Python (different magic number) than the version of Python
  23. that is doing the extraction.
  24. Second, we need structured instruction information for the
  25. (de)-parsing step. Python 3.4 and up provides this, but we still do
  26. want to run on earlier Python versions.
  27. """
  28. import sys
  29. from collections import deque
  30. from py_compile import PyCompileError
  31. from typing import Optional
  32. from xdis import check_object_path, iscode, load_module
  33. from decompyle3.scanner import get_scanner
  34. from decompyle3.semantics.pysource import (
  35. PARSER_DEFAULT_DEBUG,
  36. TREE_DEFAULT_DEBUG,
  37. code_deparse,
  38. )
  39. def disco_deparse(
  40. version: Optional[tuple],
  41. co,
  42. codename_map: dict,
  43. out,
  44. is_pypy,
  45. debug_opts,
  46. start_offset: int = 0,
  47. stop_offset: int = -1,
  48. ) -> None:
  49. """
  50. diassembles and deparses a given code block 'co'
  51. """
  52. assert iscode(co)
  53. # store final output stream for case of error
  54. real_out = out or sys.stdout
  55. print(f"# Python {version}", file=real_out)
  56. if co.co_filename:
  57. print(f"# Embedded file name: {co.co_filename}", file=real_out)
  58. scanner = get_scanner(version, is_pypy=is_pypy)
  59. queue = deque([co])
  60. disco_deparse_loop(
  61. version,
  62. scanner.ingest,
  63. codename_map,
  64. queue,
  65. real_out,
  66. is_pypy,
  67. debug_opts,
  68. start_offset=start_offset,
  69. stop_offset=stop_offset,
  70. )
  71. def disco_deparse_loop(
  72. version: Optional[tuple],
  73. disasm,
  74. codename_map: dict,
  75. queue,
  76. real_out,
  77. is_pypy,
  78. debug_opts,
  79. start_offset: int = 0,
  80. stop_offset: int = -1,
  81. ):
  82. while len(queue) > 0:
  83. co = queue.popleft()
  84. skip_token_scan = False
  85. if co.co_name in codename_map:
  86. print(
  87. "\n# %s line %d of %s"
  88. % (co.co_name, co.co_firstlineno, co.co_filename),
  89. file=real_out,
  90. )
  91. code_deparse(
  92. co,
  93. real_out,
  94. version=version,
  95. debug_opts=debug_opts,
  96. is_pypy=is_pypy,
  97. compile_mode=codename_map[co.co_name],
  98. start_offset=start_offset,
  99. stop_offset=stop_offset,
  100. )
  101. skip_token_scan = True
  102. tokens, _ = disasm(co, show_asm=debug_opts.get("asm", None))
  103. if skip_token_scan:
  104. continue
  105. for t in tokens:
  106. if iscode(t.pattr):
  107. queue.append(t.pattr)
  108. elif iscode(t.attr):
  109. queue.append(t.attr)
  110. pass
  111. pass
  112. def decompile_code_type(
  113. filename: str,
  114. codename_map: dict,
  115. outstream=None,
  116. showasm=None,
  117. showast=TREE_DEFAULT_DEBUG,
  118. showgrammar=PARSER_DEFAULT_DEBUG,
  119. start_offset=0,
  120. stop_offset=-1,
  121. ) -> bool:
  122. """
  123. decompile all lambda functions in a python byte-code file (.pyc)
  124. If given a Python source file (".py") file, we'll
  125. decompile all lambdas of the corresponding compiled object.
  126. """
  127. try:
  128. filename = check_object_path(filename)
  129. except (PyCompileError, ValueError) as e:
  130. print(f"Skipping {filename}:\n{e}")
  131. return False
  132. (version, _, _, co, is_pypy, _, _) = load_module(filename)
  133. # maybe a second -a will do before as well
  134. # asm = "after" if showasm else None
  135. debug_opts = {"asm": showasm, "tree": showast, "grammar": showgrammar}
  136. if isinstance(co, list):
  137. for bytecode in co:
  138. disco_deparse(
  139. version,
  140. bytecode,
  141. codename_map,
  142. outstream,
  143. is_pypy,
  144. debug_opts,
  145. start_offset=start_offset,
  146. stop_offset=stop_offset,
  147. )
  148. else:
  149. disco_deparse(
  150. version,
  151. co,
  152. codename_map,
  153. outstream,
  154. is_pypy,
  155. debug_opts,
  156. start_offset=start_offset,
  157. stop_offset=stop_offset,
  158. )
  159. return True
  160. def decompile_dict_comprehensions(
  161. filename: str,
  162. outstream=None,
  163. showasm=None,
  164. showast=TREE_DEFAULT_DEBUG,
  165. showgrammar=PARSER_DEFAULT_DEBUG,
  166. start_offset: int = 0,
  167. stop_offset: int = -1,
  168. ) -> Optional[bool]:
  169. """
  170. decompile all the dictionary-comprehension functions in a python byte-code
  171. file (.pyc)
  172. If given a Python source file (".py") file, we'll
  173. decompile all dict_comprehensions of the corresponding compiled object.
  174. """
  175. return decompile_code_type(
  176. filename,
  177. {"<dictcomp>": "dictcomp"},
  178. outstream,
  179. showasm,
  180. showast,
  181. showgrammar,
  182. start_offset,
  183. stop_offset,
  184. )
  185. def decompile_all_fragments(
  186. filename: str,
  187. outstream=None,
  188. showasm=None,
  189. showast=TREE_DEFAULT_DEBUG,
  190. showgrammar=PARSER_DEFAULT_DEBUG,
  191. start_offset: int = 0,
  192. stop_offset: int = -1,
  193. ) -> Optional[bool]:
  194. """
  195. decompile all comprehensions, generators, and lambda in a python byte-code
  196. file (.pyc)
  197. If given a Python source file (".py") file, we'll
  198. decompile all dict_comprehensions of the corresponding compiled object.
  199. """
  200. return decompile_code_type(
  201. filename,
  202. {
  203. "<dictcomp>": "dictcomp",
  204. "<genexpr>": "genexpr",
  205. "<lambda>": "lambda",
  206. "<listcomp>": "listcomp",
  207. "<setcomp>": "setcomp",
  208. },
  209. outstream,
  210. showasm,
  211. showast,
  212. showgrammar,
  213. start_offset=start_offset,
  214. stop_offset=stop_offset,
  215. )
  216. def decompile_generators(
  217. filename: str,
  218. outstream=None,
  219. showasm=None,
  220. showast=TREE_DEFAULT_DEBUG,
  221. showgrammar=PARSER_DEFAULT_DEBUG,
  222. start_offset: int = 0,
  223. stop_offset: int = -1,
  224. ) -> Optional[bool]:
  225. """
  226. decompile all the generator functions in a python byte-code file (.pyc)
  227. If given a Python source file (".py") file, we'll
  228. decompile all dict_comprehensions of the corresponding compiled object.
  229. """
  230. return decompile_code_type(
  231. filename,
  232. {"<genexpr>": "genexpr"},
  233. outstream,
  234. showasm,
  235. showast,
  236. showgrammar,
  237. start_offset,
  238. stop_offset,
  239. )
  240. def decompile_lambda_fns(
  241. filename: str,
  242. outstream=None,
  243. showasm=None,
  244. showast=TREE_DEFAULT_DEBUG,
  245. showgrammar=PARSER_DEFAULT_DEBUG,
  246. start_offset: int = 0,
  247. stop_offset: int = -1,
  248. ) -> Optional[bool]:
  249. """
  250. decompile all the lambda functions in a python byte-code file (.pyc)
  251. If given a Python source file (".py") file, we'll
  252. decompile all lambdas of the corresponding compiled object.
  253. """
  254. return decompile_code_type(
  255. filename,
  256. {"<lambda>": "lambda"},
  257. outstream,
  258. showasm,
  259. showast,
  260. showgrammar,
  261. start_offset=start_offset,
  262. stop_offset=stop_offset,
  263. )
  264. def decompile_list_comprehensions(
  265. filename: str,
  266. outstream=None,
  267. showasm=None,
  268. showast=TREE_DEFAULT_DEBUG,
  269. showgrammar=PARSER_DEFAULT_DEBUG,
  270. start_offset: int = 0,
  271. stop_offset: int = -1,
  272. ) -> Optional[bool]:
  273. """
  274. decompile all of the lambda functions in a python byte-code file (.pyc)
  275. If given a Python source file (".py") file, we'll
  276. decompile all list_comprehensions of the corresponding compiled object.
  277. """
  278. return decompile_code_type(
  279. filename,
  280. {"<listcomp>": "listcomp"},
  281. outstream,
  282. showasm,
  283. showast,
  284. showgrammar,
  285. start_offset=start_offset,
  286. stop_offset=stop_offset,
  287. )
  288. def decompile_set_comprehensions(
  289. filename: str,
  290. code_type,
  291. outstream=None,
  292. showasm=None,
  293. showast=TREE_DEFAULT_DEBUG,
  294. showgrammar=PARSER_DEFAULT_DEBUG,
  295. start_offset: int = 0,
  296. stop_offset: int = -1,
  297. ) -> Optional[bool]:
  298. """
  299. decompile all lambda functions in a python byte-code file (.pyc)
  300. If given a Python source file (".py") file, we'll
  301. decompile all list_comprehensions of the corresponding compiled object.
  302. """
  303. return decompile_code_type(
  304. filename,
  305. {"<setcomp>": "setcomp"},
  306. outstream,
  307. showasm,
  308. showast,
  309. showgrammar,
  310. start_offset=start_offset,
  311. stop_offset=stop_offset,
  312. )
  313. def _test() -> None:
  314. """Simple test program to disassemble a file."""
  315. argc = len(sys.argv)
  316. if argc != 2:
  317. if argc == 1:
  318. fn = __file__
  319. else:
  320. sys.stderr.write("usage: %s [-|CPython compiled file]\n" % __file__)
  321. sys.exit(2)
  322. else:
  323. fn = sys.argv[1]
  324. decompile_all_fragments(fn)
  325. if __name__ == "__main__":
  326. _test()