cross_dis.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. # (C) Copyright 2020-2021, 2023-2025 by Rocky Bernstein
  2. #
  3. # This program is free software; you can redistribute it and/or
  4. # modify it under the terms of the GNU General Public License
  5. # as published by the Free Software Foundation; either version 2
  6. # of the License, or (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. # Here, we are more closely modeling Python's ``dis`` module organization.
  17. # However, it appears that Python's names and code have been copied a bit heavily from
  18. # earlier versions of xdis (and without attribution).
  19. from types import CodeType
  20. from typing import List
  21. from xdis.util import (
  22. COMPILER_FLAG_NAMES,
  23. PYPY_COMPILER_FLAG_NAMES,
  24. better_repr,
  25. code2num,
  26. )
  27. from xdis.version_info import IS_GRAAL
  28. def _try_compile(source: str, name: str) -> CodeType:
  29. """Attempts to compile the given source, first as an expression and
  30. then as a statement if the first approach fails.
  31. Utility function to accept strings in functions that otherwise
  32. expect code objects
  33. """
  34. try:
  35. c = compile(source, name, "eval")
  36. except SyntaxError:
  37. c = compile(source, name, "exec")
  38. return c
  39. def code_info(x, version_tuple, is_pypy=False) -> str:
  40. """Formatted details of methods, functions, or code."""
  41. return format_code_info(get_code_object(x), version_tuple, is_pypy=is_pypy)
  42. def get_code_object(x):
  43. """Helper to handle methods, functions, generators, strings and raw code objects"""
  44. if hasattr(x, "__func__"): # Method
  45. x = x.__func__
  46. if hasattr(x, "__code__"): # Function
  47. x = x.__code__
  48. elif hasattr(x, "func_code"): # Function pre 2.7
  49. x = x.__code__
  50. elif hasattr(x, "gi_code"): # Generator
  51. x = x.gi_code
  52. elif hasattr(x, "ag_code"): # ...an asynchronous generator object, or
  53. x = x.ag_code
  54. elif hasattr(x, "cr_code"): # ...a coroutine.
  55. x = x.cr_code
  56. # Handle source code.
  57. if isinstance(x, str):
  58. x = _try_compile(x, "<disassembly>")
  59. # By now, if we don't have a code object, we can't disassemble x.
  60. if hasattr(x, "co_code"):
  61. return x
  62. raise TypeError("don't know how to disassemble %s objects" % type(x).__name__)
  63. def get_cache_size_313(opname: str) -> int:
  64. _inline_cache_entries = {
  65. "LOAD_GLOBAL": 4,
  66. "BINARY_OP": 1,
  67. "UNPACK_SEQUENCE": 1,
  68. "COMPARE_OP": 1,
  69. "CONTAINS_OP": 1,
  70. "BINARY_SUBSCR": 1,
  71. "FOR_ITER": 1,
  72. "LOAD_SUPER_ATTR": 1,
  73. "LOAD_ATTR": 9,
  74. "STORE_ATTR": 4,
  75. "CALL": 3,
  76. "STORE_SUBSCR": 1,
  77. "SEND": 1,
  78. "JUMP_BACKWARD": 1,
  79. "TO_BOOL": 3,
  80. "POP_JUMP_IF_TRUE": 1,
  81. "POP_JUMP_IF_FALSE": 1,
  82. "POP_JUMP_IF_NONE": 1,
  83. "POP_JUMP_IF_NOT_NONE": 1,
  84. }
  85. return _inline_cache_entries.get(opname, 0)
  86. # For compatibility
  87. _get_cache_size_313 = get_cache_size_313
  88. def findlabels(code: bytes, opc):
  89. if opc.version_tuple < (3, 10) or IS_GRAAL:
  90. return findlabels_pre_310(code, opc)
  91. return findlabels_310(code, opc)
  92. def findlabels_310(code: bytes, opc):
  93. """Returns a list of instruction offsets in the supplied bytecode
  94. which are the targets of some sort of jump instruction.
  95. """
  96. labels = []
  97. for offset, op, arg in unpack_opargs_bytecode_310(code, opc):
  98. if arg is not None:
  99. if op in opc.JREL_OPS:
  100. if opc.version_tuple >= (3, 11) and opc.opname[op] in ("JUMP_BACKWARD", "JUMP_BACKWARD_NO_INTERRUPT"):
  101. arg = -arg
  102. label = offset + 2 + arg * 2
  103. # in 3.13 we have to add total cache offsets to label
  104. if opc.version_tuple >= (3, 13):
  105. cachesize = _get_cache_size_313(opc.opname[op])
  106. label += 2 * cachesize
  107. elif op in opc.JABS_OPS:
  108. label = arg * 2
  109. else:
  110. continue
  111. if label not in labels:
  112. labels.append(label)
  113. return labels
  114. def findlabels_pre_310(code, opc):
  115. """Returns a list of instruction offsets in the supplied bytecode
  116. which are the targets of some sort of jump instruction.
  117. """
  118. offsets = []
  119. for offset, op, arg in unpack_opargs_bytecode(code, opc):
  120. if arg is not None:
  121. jump_offset = -1
  122. if op in opc.JREL_OPS:
  123. op_len = op_size(op, opc)
  124. jump_offset = offset + op_len + arg
  125. elif op in opc.JABS_OPS:
  126. jump_offset = arg
  127. if jump_offset >= 0:
  128. if jump_offset not in offsets:
  129. offsets.append(jump_offset)
  130. return offsets
  131. # For the `co_lines` attribute, we want to emit the full form, omitting
  132. # the (350, 360, No line number) and empty entries.
  133. NO_LINE_NUMBER = -128
  134. def findlinestarts(code, dup_lines: bool=False):
  135. """Find the offsets in a byte code which are start of lines in the source.
  136. Generate pairs (offset, lineno) as described in Python/compile.c.
  137. """
  138. if hasattr(code, "co_lines"):
  139. # Taken from 3.10 findlinestarts
  140. lastline = None
  141. for start, _, line in code.co_lines():
  142. if line is not None and line != lastline:
  143. lastline = line
  144. yield start, line
  145. else:
  146. lineno_table = code.co_lnotab
  147. if isinstance(lineno_table, dict):
  148. # We have an uncompressed line-number table
  149. # The below could be done with a Python generator, but
  150. # we want to be Python 2.x compatible.
  151. for addr, lineno in lineno_table.items():
  152. yield addr, lineno
  153. # For 3.8 we have to fall through to the return rather
  154. # than add raise StopIteration
  155. elif len(lineno_table) == 0:
  156. yield 0, code.co_firstlineno
  157. else:
  158. if isinstance(lineno_table[0], int):
  159. byte_increments = list(code.co_lnotab[0::2])
  160. line_deltas = list(code.co_lnotab[1::2])
  161. else:
  162. byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
  163. line_deltas = [ord(c) for c in code.co_lnotab[1::2]]
  164. bytecode_len = len(code.co_code)
  165. lastlineno = None
  166. lineno = code.co_firstlineno
  167. offset = 0
  168. byte_incr = 0
  169. for byte_incr, line_delta in zip(byte_increments, line_deltas):
  170. if byte_incr:
  171. if lineno != lastlineno or dup_lines and 0 < byte_incr < 255:
  172. yield offset, lineno
  173. lastlineno = lineno
  174. pass
  175. if offset >= bytecode_len:
  176. # The rest of the ``lnotab byte offsets are past the end of
  177. # the bytecode; any line numbers for these have been removed.
  178. return
  179. offset += byte_incr
  180. pass
  181. if line_delta >= 0x80:
  182. # line_deltas is an array of 8-bit *signed* integers
  183. line_delta -= 0x100
  184. lineno += line_delta
  185. if lineno != lastlineno or (dup_lines and 0 < byte_incr < 255):
  186. yield offset, lineno
  187. return
  188. def instruction_size(op, opc) -> int:
  189. """For a given opcode, `op`, in opcode module `opc`,
  190. return the size, in bytes, of an `op` instruction.
  191. This is the size of the opcode (one byte) and any operand it has.
  192. In Python before version 3.6, this will be either 1 or 3 bytes.
  193. In Python 3.6 or later, it is 2 bytes: a "word"."""
  194. if op < opc.HAVE_ARGUMENT:
  195. return 2 if opc.version_tuple >= (3, 6) else 1
  196. else:
  197. return 2 if opc.version_tuple >= (3, 6) else 3
  198. # Compatibility
  199. op_size = instruction_size
  200. def show_code(co, version_tuple, file=None, is_pypy: bool=False) -> None:
  201. """Print details of methods, functions, or code to *file*.
  202. If *file* is not provided, the output is printed on stdout.
  203. """
  204. if file is None:
  205. print(code_info(co, version_tuple, is_pypy=is_pypy))
  206. else:
  207. file.write(code_info(co, version_tuple) + "\n")
  208. def op_has_argument(opcode: int, opc) -> bool:
  209. """
  210. Return True if `opcode` instruction has an operand.
  211. """
  212. return opcode >= opc.HAVE_ARGUMENT
  213. def pretty_flags(flags, is_pypy=False) -> str:
  214. """Return pretty representation of code flags."""
  215. names = []
  216. result = "0x%08x" % flags
  217. for i in range(32):
  218. flag = 1 << i
  219. if flags & flag:
  220. names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
  221. if is_pypy:
  222. names.append(PYPY_COMPILER_FLAG_NAMES.get(flag, hex(flag)))
  223. flags ^= flag
  224. if not flags:
  225. break
  226. else:
  227. names.append(hex(flags))
  228. names.reverse()
  229. return "%s (%s)" % (result, " | ".join(names))
  230. def format_code_info(
  231. co, version_tuple: tuple, name=None, is_pypy=False, is_graal=False
  232. ) -> str:
  233. if not name:
  234. name = co.co_name
  235. lines = []
  236. if not (name == "?" and version_tuple <= (2, 4)):
  237. lines.append("# Method Name: %s" % name)
  238. # Python before version 2.4 and earlier didn't store a name for the main routine.
  239. # Later versions use "<module>"
  240. lines.append("# Filename: %s" % co.co_filename)
  241. if not is_graal:
  242. if version_tuple >= (1, 3):
  243. lines.append("# Argument count: %s" % co.co_argcount)
  244. if version_tuple >= (3, 8) and hasattr(co, "co_posonlyargcount"):
  245. lines.append("# Position-only argument count: %s" % co.co_posonlyargcount)
  246. if version_tuple >= (3, 0) and hasattr(co, "co_kwonlyargcount"):
  247. lines.append("# Keyword-only arguments: %s" % co.co_kwonlyargcount)
  248. pos_argc = co.co_argcount
  249. if version_tuple >= (1, 3):
  250. lines.append("# Number of locals: %s" % co.co_nlocals)
  251. if version_tuple >= (1, 5):
  252. lines.append("# Stack size: %s" % co.co_stacksize)
  253. pass
  254. pass
  255. else:
  256. pos_argc = 0
  257. if version_tuple >= (1, 3):
  258. lines.append(
  259. "# Flags: %s" % pretty_flags(co.co_flags, is_pypy=is_pypy)
  260. )
  261. if version_tuple >= (1, 5):
  262. lines.append("# First Line: %s" % co.co_firstlineno)
  263. # if co.co_freevars:
  264. # lines.append("# Freevars: %s" % str(co.co_freevars))
  265. if co.co_consts:
  266. lines.append("# Constants:")
  267. for i, c in enumerate(co.co_consts):
  268. lines.append("# %4d: %s" % (i, better_repr(c)))
  269. if co.co_names:
  270. lines.append("# Names:")
  271. for i_n in enumerate(co.co_names):
  272. lines.append("# %4d: %s" % i_n)
  273. if co.co_varnames:
  274. lines.append("# Varnames:")
  275. lines.append("#\t%s" % ", ".join(co.co_varnames))
  276. pass
  277. if pos_argc > 0:
  278. lines.append("# Positional arguments:")
  279. lines.append("#\t%s" % ", ".join(co.co_varnames[:pos_argc]))
  280. pass
  281. if len(co.co_varnames) > pos_argc:
  282. lines.append("# Local variables:")
  283. for i, n in enumerate(co.co_varnames[pos_argc:]):
  284. lines.append("# %4d: %s" % (pos_argc + i, n))
  285. if version_tuple > (2, 0):
  286. if co.co_freevars:
  287. lines.append("# Free variables:")
  288. for i_n in enumerate(co.co_freevars):
  289. lines.append("# %4d: %s" % i_n)
  290. pass
  291. pass
  292. if co.co_cellvars:
  293. lines.append("# Cell variables:")
  294. for i_n in enumerate(co.co_cellvars):
  295. lines.append("# %4d: %s" % i_n)
  296. pass
  297. pass
  298. return "\n".join(lines)
  299. def format_exception_table(bytecode, version_tuple) -> str:
  300. if version_tuple < (3, 11) or not hasattr(bytecode, "exception_entries"):
  301. return ""
  302. lines: List[str] = ["ExceptionTable:"]
  303. for entry in bytecode.exception_entries:
  304. lasti = " lasti" if entry.lasti else ""
  305. end = entry.end - 2
  306. lines.append(
  307. f" {entry.start} to {end} -> {entry.target} [{entry.depth}]{lasti}"
  308. )
  309. return "\n".join(lines)
  310. def extended_arg_val(opc, val):
  311. return val << opc.EXTENDED_ARG_SHIFT
  312. def unpack_opargs_bytecode_310(code: bytes, opc):
  313. extended_arg = 0
  314. try:
  315. n = len(code)
  316. except TypeError:
  317. code = code.co_code
  318. n = len(code)
  319. for offset in range(0, n, 2):
  320. op = code2num(code, offset)
  321. if op_has_argument(op, opc):
  322. arg = code2num(code, offset + 1) | extended_arg
  323. extended_arg = extended_arg_val(opc, arg) if op == opc.EXTENDED_ARG else 0
  324. else:
  325. arg = None
  326. yield offset, op, arg
  327. # This is modified from Python 3.6's ``dis`` module
  328. def unpack_opargs_bytecode(code, opc):
  329. extended_arg = 0
  330. try:
  331. n = len(code)
  332. except TypeError:
  333. code = code.co_code
  334. n = len(code)
  335. offset = 0
  336. while offset < n:
  337. prev_offset = offset
  338. op = code2num(code, offset)
  339. offset += 1
  340. if op_has_argument(op, opc):
  341. arg = code2num(code, offset) | extended_arg
  342. extended_arg = (
  343. extended_arg_val(opc, arg)
  344. if hasattr(opc, "EXTENDED_ARG") and op == opc.EXTENDED_ARG
  345. else 0
  346. )
  347. offset += 2
  348. else:
  349. arg = None
  350. yield prev_offset, op, arg
  351. def get_jump_target_maps(code, opc):
  352. """Returns a dictionary where the key is an offset and the values are
  353. a list of instruction offsets which can get run before that
  354. instruction. This includes jump instructions as well as non-jump
  355. instructions. Therefore, the keys of the dictionary are reachable
  356. instructions. The values of the dictionary may be useful in control-flow
  357. analysis.
  358. """
  359. offset2prev = {}
  360. prev_offset = -1
  361. for offset, op, arg in unpack_opargs_bytecode(code, opc):
  362. if prev_offset >= 0:
  363. prev_list = offset2prev.get(offset, [])
  364. prev_list.append(prev_offset)
  365. offset2prev[offset] = prev_list
  366. if op in opc.NOFOLLOW:
  367. prev_offset = -1
  368. else:
  369. prev_offset = offset
  370. if arg is not None:
  371. jump_offset = -1
  372. if op in opc.JREL_OPS:
  373. op_len = op_size(op, opc)
  374. jump_offset = offset + op_len + arg
  375. elif op in opc.JABS_OPS:
  376. jump_offset = arg
  377. if jump_offset >= 0:
  378. prev_list = offset2prev.get(jump_offset, [])
  379. prev_list.append(offset)
  380. offset2prev[jump_offset] = prev_list
  381. return offset2prev
  382. # In CPython, this is C code. We redo this in Python using the
  383. # information in opc.
  384. def xstack_effect(opcode, opc, oparg: int = 0, jump=None):
  385. """Compute the stack effect of opcode with argument oparg, using
  386. oppush and oppop tables in opc.
  387. If the code has a jump target and jump is True, stack_effect()
  388. will return the stack effect of jumping. If jump is False, it will
  389. return the stack effect of not jumping. And if jump is None
  390. (default), it will return the maximal stack effect of both cases.
  391. """
  392. version_tuple = opc.version_tuple
  393. pop, push = opc.oppop[opcode], opc.oppush[opcode]
  394. opname = opc.opname[opcode]
  395. if opname in "BUILD_CONST_KEY_MAP" and version_tuple >= (3, 12):
  396. return -oparg
  397. if opname == "BUILD_MAP" and version_tuple >= (3, 5):
  398. return 1 - (2 * oparg)
  399. elif opname in ("UNPACK_SEQUENCE", "UNPACK_EX") and version_tuple >= (3, 0):
  400. return push + oparg
  401. elif opname in (
  402. "BUILD_LIST",
  403. "BUILD_SET",
  404. "BUILD_STRING",
  405. "BUILD_TUPLE",
  406. ) and version_tuple >= (3, 12):
  407. return 1 - oparg
  408. elif opname in ("BUILD_SLICE") and version_tuple <= (2, 7):
  409. return -2 if oparg == 3 else -1
  410. elif opname == "LOAD_ATTR" and version_tuple >= (3, 12):
  411. return 1 if oparg & 1 else 0
  412. elif opname == "MAKE_FUNCTION":
  413. if version_tuple >= (3, 5):
  414. if 0 <= oparg <= 10:
  415. if version_tuple == (3, 5):
  416. return [-1, -2, -3, -3, -2, -3, -3, -4, -2, -3, -3, -4][oparg]
  417. elif (3, 6) <= version_tuple < (3, 11):
  418. return [-1, -2, -2, -3, -2, -3, -3, -4, -2, -3, -3, -4][oparg]
  419. elif 0 <= oparg <= 2:
  420. return [0, -1, -1][oparg]
  421. else:
  422. return None
  423. else:
  424. return None
  425. elif opname == "CALL" and version_tuple >= (3, 12):
  426. return -oparg - 1
  427. elif opname == "CALL_KW":
  428. return -2 - oparg
  429. elif opname == "CALL_FUNCTION_EX":
  430. if (3, 5) <= version_tuple < (3, 11):
  431. return -2 if oparg & 1 else -1
  432. elif 0 <= oparg <= 3:
  433. return -3 if oparg & 1 else -2
  434. else:
  435. return None
  436. elif opname in (
  437. "INSTRUMENTED_LOAD_SUPER_ATTR",
  438. "LOAD_SUPER_ATTR",
  439. ) and version_tuple >= (3, 12):
  440. return -1 if oparg & 1 else -2
  441. elif opname == "LOAD_GLOBAL" and version_tuple >= (3, 11):
  442. return 2 if oparg & 1 else 1
  443. elif opname == "PRECALL" and version_tuple >= (3, 11):
  444. return -oparg
  445. elif opname == "RAISE_VARARGS" and version_tuple >= (3, 12):
  446. return -oparg
  447. if push >= 0 and pop >= 0:
  448. return push - pop
  449. elif pop < 0:
  450. # The amount popped depends on oparg, and opcode class
  451. if opcode in opc.VARGS_OPS:
  452. return push - oparg + (pop + 1)
  453. elif opcode in opc.NARGS_OPS:
  454. return -oparg + pop + push
  455. return -100
  456. if __name__ == "__main__":
  457. from dis import findlabels as findlabels_std
  458. my_code = findlabels.__code__.co_code
  459. from xdis.op_imports import get_opcode_module
  460. my_opc = get_opcode_module()
  461. assert findlabels(my_code, my_opc) == findlabels_std(my_code)