disasm.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. # Copyright (c) 2016-2018, 2020-2021, 2023-2025
  2. # by Rocky Bernstein
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # of the License, or (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  17. """
  18. CPython version-independent disassembly routines
  19. """
  20. # Note: we tend to eschew new Python 3 things, and even future
  21. # imports so this can run on older Pythons. This is
  22. # intended to be a more cross-version Python program
  23. import datetime
  24. import dis
  25. import os
  26. import re
  27. import sys
  28. import types
  29. from collections import deque
  30. from typing import Tuple
  31. import xdis
  32. from xdis.bytecode import Bytecode
  33. from xdis.codetype import codeType2Portable
  34. from xdis.codetype.base import iscode
  35. from xdis.cross_dis import format_code_info, format_exception_table
  36. from xdis.load import check_object_path, load_module
  37. from xdis.magics import GRAAL3_MAGICS, PYTHON_MAGIC_INT
  38. from xdis.op_imports import op_imports, remap_opcodes
  39. from xdis.version import __version__
  40. from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE
  41. def get_opcode(version_tuple, is_pypy, alternate_opmap=None):
  42. # Set up disassembler with the right opcodes
  43. lookup = ".".join((str(i) for i in version_tuple))
  44. if is_pypy:
  45. lookup += "pypy"
  46. if lookup in op_imports.keys():
  47. if alternate_opmap is not None:
  48. # TODO: change bytecode version number comment line to indicate altered
  49. return remap_opcodes(op_imports[lookup], alternate_opmap)
  50. return op_imports[lookup]
  51. if is_pypy:
  52. pypy_str = " for pypy"
  53. else:
  54. pypy_str = ""
  55. raise TypeError(f"{lookup} is not a Python version{pypy_str} I know about")
  56. def show_module_header(
  57. version_tuple,
  58. co,
  59. timestamp,
  60. out=sys.stdout,
  61. is_pypy=False,
  62. magic_int=None,
  63. source_size=None,
  64. sip_hash=None,
  65. header=True,
  66. show_filename=True,
  67. is_graal=False,
  68. ) -> None:
  69. bytecode_version = ".".join((str(i) for i in version_tuple))
  70. real_out = out or sys.stdout
  71. if is_pypy:
  72. co_pypy_str = "PyPy "
  73. elif is_graal:
  74. co_pypy_str = "Graal "
  75. else:
  76. co_pypy_str = ""
  77. if IS_PYPY:
  78. run_pypy_str = "PyPy "
  79. else:
  80. run_pypy_str = ""
  81. if header:
  82. magic_str = ""
  83. if magic_int:
  84. magic_str = str(magic_int)
  85. real_out.write(
  86. (
  87. "# pydisasm version %s\n# %sPython bytecode %s%s"
  88. "\n# Disassembled from %sPython %s\n"
  89. )
  90. % (
  91. __version__,
  92. co_pypy_str,
  93. bytecode_version,
  94. " (%s)" % magic_str,
  95. run_pypy_str,
  96. "\n# ".join(sys.version.split("\n")),
  97. )
  98. )
  99. if PYTHON_VERSION_TRIPLE < (3, 0) and bytecode_version >= "3.0":
  100. real_out.write(
  101. "\n## **Warning** bytecode strings will be converted to strings.\n"
  102. )
  103. real_out.write("## To avoid loss, run this from Python 3.0 or greater\n\n")
  104. if timestamp is not None:
  105. value = datetime.datetime.fromtimestamp(timestamp)
  106. real_out.write("# Timestamp in code: %d" % timestamp)
  107. real_out.write(value.strftime(" (%Y-%m-%d %H:%M:%S)\n"))
  108. if source_size is not None:
  109. real_out.write("# Source code size mod 2**32: %d bytes\n" % source_size)
  110. if sip_hash is not None:
  111. real_out.write("# SipHash: 0x%x\n" % sip_hash)
  112. if show_filename:
  113. real_out.write("# Embedded file name: %s\n" % co.co_filename)
  114. def disco(
  115. version_tuple,
  116. co,
  117. timestamp,
  118. out=sys.stdout,
  119. is_pypy: bool=False,
  120. magic_int=None,
  121. source_size=None,
  122. sip_hash=None,
  123. asm_format: str="classic",
  124. alternate_opmap=None,
  125. show_source: bool=False,
  126. is_graal: bool=False,
  127. methods=tuple(),
  128. ) -> None:
  129. """
  130. disassembles and deparses a given code block 'co'
  131. """
  132. assert iscode(co)
  133. show_module_header(
  134. version_tuple,
  135. co,
  136. timestamp,
  137. out,
  138. is_pypy,
  139. magic_int,
  140. source_size,
  141. sip_hash,
  142. header=True,
  143. show_filename=False,
  144. is_graal=is_graal,
  145. )
  146. # Store final output stream when there is an error.
  147. real_out = out or sys.stdout
  148. if co.co_filename and asm_format != "xasm":
  149. if not_filtered(co, methods):
  150. real_out.write(format_code_info(co, version_tuple, is_graal=is_graal) + "\n")
  151. pass
  152. opc = get_opcode(version_tuple, is_pypy, alternate_opmap)
  153. if is_graal:
  154. real_out.write("# We can't decode Graal bytecode\n")
  155. return
  156. if asm_format == "xasm":
  157. disco_loop_asm_format(opc, version_tuple, co, real_out, {}, set([]))
  158. else:
  159. queue = deque([co])
  160. disco_loop(
  161. opc,
  162. version_tuple,
  163. queue,
  164. real_out,
  165. asm_format=asm_format,
  166. dup_lines=True,
  167. show_source=show_source,
  168. methods=methods,
  169. )
  170. def disco_loop(
  171. opc,
  172. version_tuple,
  173. queue,
  174. real_out,
  175. dup_lines=False,
  176. asm_format="classic",
  177. show_source=False,
  178. methods=tuple(),
  179. ) -> None:
  180. """Disassembles a queue of code objects. If we discover
  181. another code object which will be found in co_consts, we add
  182. the new code to the list. Note that the order of code discovery
  183. is in the order of first encountered that is not amenable for
  184. the format used by a disassembler where code objects should
  185. be defined before using them in other functions.
  186. However, this is not recursive and will overall lead to less
  187. memory consumption at run time.
  188. """
  189. while len(queue) > 0:
  190. co = queue.popleft()
  191. if not_filtered(co, methods):
  192. if co.co_name not in ("<module>", "?"):
  193. real_out.write("\n" + format_code_info(co, version_tuple) + "\n")
  194. if asm_format == "dis":
  195. assert version_tuple[:2] == PYTHON_VERSION_TRIPLE[:2], (
  196. "dis requires disassembly from the same Python version: "
  197. f"Bytecode is for {version_tuple[:2]}; Running:{PYTHON_VERSION_TRIPLE[:2]}"
  198. )
  199. dis.disassemble(co, lasti=-1, file=real_out)
  200. else:
  201. bytecode = Bytecode(co, opc, dup_lines=dup_lines)
  202. real_out.write(
  203. bytecode.dis(asm_format=asm_format, show_source=show_source) + "\n"
  204. )
  205. if version_tuple >= (3, 11):
  206. if bytecode.exception_entries not in (None, []):
  207. exception_table = format_exception_table(bytecode, version_tuple)
  208. real_out.write(exception_table + "\n")
  209. for c in co.co_consts:
  210. if iscode(c):
  211. queue.append(c)
  212. pass
  213. pass
  214. def code_uniquify(basename, co_code) -> str:
  215. # FIXME: better would be a hash of the co_code
  216. return "%s_0x%x" % (basename, id(co_code))
  217. def disco_loop_asm_format(opc, version_tuple, co, real_out, fn_name_map, all_fns) -> None:
  218. """Produces disassembly in a format more conducive to
  219. automatic assembly by producing inner modules before they are
  220. used by outer ones. Since this is recursive, we'll
  221. use more stack space at runtime.
  222. """
  223. co = codeType2Portable(co)
  224. co_name = co.co_name
  225. mapped_name = fn_name_map.get(co_name, co_name)
  226. new_consts = []
  227. for c in co.co_consts:
  228. if iscode(c):
  229. if isinstance(c, types.CodeType):
  230. c_compat = codeType2Portable(c)
  231. else:
  232. c_compat = c
  233. disco_loop_asm_format(
  234. opc, version_tuple, c_compat, real_out, fn_name_map, all_fns
  235. )
  236. m = re.match(".* object <(.+)> at", str(c))
  237. if m:
  238. basename = m.group(1)
  239. if basename != "module":
  240. mapped_name = code_uniquify(basename, c.co_code)
  241. c_compat.co_name = mapped_name
  242. c_compat.freeze()
  243. new_consts.append(c_compat)
  244. else:
  245. new_consts.append(c)
  246. pass
  247. co.co_consts = new_consts
  248. m = re.match("^<(.+)>$", co.co_name)
  249. if m is not None or co.co_name in all_fns:
  250. if co.co_name in all_fns:
  251. basename = co_name
  252. else:
  253. basename = m.group(1)
  254. if basename != "module":
  255. mapped_name = code_uniquify(basename, co.co_code)
  256. co_name = mapped_name
  257. if mapped_name in fn_name_map:
  258. # We can have two lambdas created that are the same
  259. # but have different line numbers.
  260. mapped_name += f"_{str(co.co_firstlineno)}"
  261. fn_name_map[mapped_name] = basename
  262. co.co_name = mapped_name
  263. pass
  264. elif co_name in fn_name_map:
  265. # FIXME: better would be a hash of the co_code
  266. mapped_name = code_uniquify(co_name, co.co_code)
  267. fn_name_map[mapped_name] = co_name
  268. co.co_name = mapped_name
  269. pass
  270. else:
  271. mapped_name = co.co_name
  272. co = co.freeze()
  273. all_fns.add(co_name)
  274. if co.co_name != "<module>" or co.co_filename:
  275. real_out.write("\n" + format_code_info(co, version_tuple, mapped_name) + "\n")
  276. bytecode = Bytecode(co, opc, dup_lines=True)
  277. real_out.write(bytecode.dis(asm_format="asm") + "\n")
  278. def disassemble_file(
  279. filename: str,
  280. outstream=sys.stdout,
  281. asm_format="classic",
  282. alternate_opmap=None,
  283. show_source=False,
  284. methods: Tuple[str] = tuple()
  285. ):
  286. """
  287. Disassemble Python byte-code file (.pyc).
  288. If given a Python source file (".py") file, we'll
  289. try to find the corresponding compiled object.
  290. If that fails, we'll compile internally for the Python version currently running.
  291. """
  292. pyc_filename = None
  293. try:
  294. # FIXME: add whether we want PyPy
  295. pyc_filename = check_object_path(filename)
  296. (
  297. version_tuple,
  298. timestamp,
  299. magic_int,
  300. co,
  301. is_pypy,
  302. source_size,
  303. sip_hash,
  304. ) = load_module(pyc_filename)
  305. except (ImportError, NotImplementedError, ValueError):
  306. raise
  307. except Exception:
  308. # Hack alert: we're using pyc_filename set as a proxy for whether the filename exists.
  309. # check_object_path() will succeed if the file exists.
  310. if pyc_filename is None:
  311. raise
  312. stat = os.stat(filename)
  313. source = open(filename, "r").read()
  314. co = compile(source, filename, "exec")
  315. is_pypy = IS_PYPY
  316. magic_int = PYTHON_MAGIC_INT
  317. sip_hash = 0
  318. source_size = stat.st_size
  319. timestamp = stat.st_mtime
  320. version_tuple = PYTHON_VERSION_TRIPLE
  321. else:
  322. filename = pyc_filename
  323. is_graal = magic_int in GRAAL3_MAGICS
  324. if asm_format == "header":
  325. show_module_header(
  326. version_tuple,
  327. co,
  328. timestamp,
  329. outstream,
  330. is_pypy,
  331. magic_int,
  332. source_size,
  333. sip_hash,
  334. header=True,
  335. show_filename=True,
  336. is_graal=is_graal,
  337. )
  338. else:
  339. disco(
  340. version_tuple=version_tuple,
  341. co=co,
  342. timestamp=timestamp,
  343. out=outstream,
  344. is_pypy=is_pypy,
  345. magic_int=magic_int,
  346. source_size=source_size,
  347. sip_hash=sip_hash,
  348. asm_format=asm_format,
  349. alternate_opmap=alternate_opmap,
  350. show_source=show_source,
  351. is_graal=is_graal,
  352. methods=methods,
  353. )
  354. # print co.co_filename
  355. return (
  356. filename,
  357. co,
  358. version_tuple,
  359. timestamp,
  360. magic_int,
  361. is_pypy,
  362. source_size,
  363. sip_hash,
  364. )
  365. def not_filtered(co: types.CodeType, methods: tuple) -> bool:
  366. return len(methods) == 0 or co.co_name in methods
  367. def _test() -> None:
  368. """Simple test program to disassemble a file."""
  369. argc = len(sys.argv)
  370. if argc == 1:
  371. if xdis.PYTHON3:
  372. disassemble_file(__file__)
  373. disassemble_file(__file__, methods=("code_uniquify",))
  374. else:
  375. sys.stderr.write(f"usage: {__file__} [-|CPython compiled file [format]]\n")
  376. sys.exit(2)
  377. elif argc == 3:
  378. fn, asm_format = sys.argv[1:3]
  379. disassemble_file(fn, asm_format=asm_format)
  380. else:
  381. fn = sys.argv[1]
  382. disassemble_file(fn)
  383. if __name__ == "__main__":
  384. _test()