bytecode.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. # Copyright (c) 2018-2025 by Rocky Bernstein
  2. #
  3. # This program is free software; you can redistribute it and/or
  4. # modify it under the terms of the GNU General Public License
  5. # as published by the Free Software Foundation; either version 2
  6. # of the License, or (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. """
  17. Python bytecode and instruction classes Extracted from Python 3
  18. "dis" module but generalized to allow running on multiple code types
  19. including those from Python 1.x and 2.x.
  20. """
  21. import collections
  22. import inspect
  23. import sys
  24. from io import StringIO
  25. from linecache import getline
  26. from types import CodeType
  27. from typing import Iterable, Iterator, Optional, Tuple, Union
  28. from xdis.cross_dis import (
  29. format_code_info,
  30. get_code_object,
  31. instruction_size,
  32. op_has_argument,
  33. )
  34. from xdis.cross_types import UnicodeForPython3
  35. from xdis.instruction import Instruction
  36. from xdis.op_imports import get_opcode_module
  37. from xdis.opcodes.opcode_36 import format_CALL_FUNCTION, format_CALL_FUNCTION_EX
  38. from xdis.util import code2num, num2code
  39. from xdis.version_info import IS_PYPY
  40. VARIANT = "pypy" if IS_PYPY else None
  41. def get_docstring(filename: str, line_number: int, doc_str: str) -> str:
  42. while len(doc_str) < 80:
  43. next_line = getline(filename, line_number).strip()
  44. doc_str += "\\n" + next_line
  45. if next_line.endswith('"""'):
  46. break
  47. line_number += 1
  48. if len(doc_str) > 80:
  49. doc_str = doc_str[:-7] + '... """'
  50. return doc_str + "\n"
  51. def get_jump_val(jump_arg: int, version: tuple) -> int:
  52. return jump_arg * 2 if version[:2] >= (3, 10) else jump_arg
  53. def get_const_info(const_index, const_list):
  54. """Helper to get optional details about const references
  55. Returns the dereferenced constant and its repr if the constant
  56. list is defined.
  57. Otherwise, returns the constant index and its repr().
  58. """
  59. arg_val = const_index
  60. if const_list is not None:
  61. arg_val = const_list[const_index]
  62. arg_repr = (
  63. prefer_double_quote(repr(arg_val))
  64. if isinstance(arg_val, str)
  65. else repr(arg_val)
  66. )
  67. # Float values "nan" and "inf" are not directly representable in Python at least
  68. # before 3.5 and even there it is via a library constant.
  69. # So we will canonicalize their representation as float('nan') and float('inf')
  70. if isinstance(arg_val, float) and str(arg_val) in frozenset(
  71. ["nan", "-nan", "inf", "-inf"]
  72. ):
  73. return arg_val, f"float('{arg_val}')"
  74. return arg_val, arg_repr
  75. # For compatibility
  76. _get_const_info = get_const_info
  77. def get_name_info(name_index, name_list):
  78. """Helper to get optional details about named references
  79. Returns the dereferenced name as both value and repr if the name
  80. list is defined.
  81. Otherwise, returns the name index and its repr().
  82. """
  83. argval = name_index
  84. if (
  85. name_list is not None
  86. # PyPY seems to "optimize" out constant names,
  87. # so we need the following condition to handle this situation:
  88. and name_index < len(name_list)
  89. ):
  90. argval = name_list[name_index]
  91. argrepr = argval
  92. else:
  93. argrepr = repr(argval)
  94. return argval, argrepr
  95. def get_optype(opcode: int, opc) -> str:
  96. """Helper to determine what class of instructions ``opcode`` is in.
  97. Return is a string in:
  98. compare, const, free, jabs, jrel, local, name, nargs, or ??
  99. """
  100. if opcode in opc.COMPARE_OPS:
  101. return "compare"
  102. elif opcode in opc.CONST_OPS:
  103. return "const"
  104. elif opcode in opc.FREE_OPS:
  105. return "free"
  106. elif opcode in opc.JABS_OPS:
  107. return "jabs"
  108. elif opcode in opc.JREL_OPS:
  109. return "jrel"
  110. elif opcode in opc.LOCAL_OPS:
  111. return "local"
  112. elif opcode in opc.NAME_OPS:
  113. return "name"
  114. elif opcode in opc.NARGS_OPS:
  115. return "nargs"
  116. # This has to come after NARGS_OPS. Some are in both?
  117. elif opcode in opc.VARGS_OPS:
  118. return "vargs"
  119. elif opcode in opc.ENCODED_ARG_OPS:
  120. return "encoded_arg"
  121. return "??"
  122. # For compatibility
  123. _get_name_info = get_name_info
  124. def offset2line(offset: int, linestarts):
  125. """linestarts is expected to be a *list of (offset, line number)
  126. where both offset and line number are in increasing order.
  127. Return the closest line number at or below the offset.
  128. If offset is less than the first line number given in `linestarts`,
  129. return line number 0.
  130. """
  131. if len(linestarts) == 0 or offset < linestarts[0][0]:
  132. return 0
  133. low = 0
  134. high = len(linestarts) - 1
  135. mid = (low + high + 1) // 2
  136. while low <= high:
  137. if linestarts[mid][0] > offset:
  138. high = mid - 1
  139. elif linestarts[mid][0] < offset:
  140. low = mid + 1
  141. else:
  142. return linestarts[mid][1]
  143. mid = (low + high + 1) // 2
  144. pass
  145. # Not found. Return the closest position below.
  146. if mid >= len(linestarts):
  147. return linestarts[len(linestarts) - 1][1]
  148. return linestarts[high][1]
  149. def _parse_varint(iterator: Iterator[int]) -> int:
  150. b = next(iterator)
  151. val = b & 63
  152. while b & 64:
  153. val <<= 6
  154. b = next(iterator)
  155. val |= b & 63
  156. return val
  157. _ExceptionTableEntry = collections.namedtuple(
  158. "_ExceptionTableEntry", "start end target depth lasti"
  159. )
  160. def parse_exception_table(exception_table: bytes):
  161. iterator = iter(exception_table)
  162. entries = []
  163. try:
  164. while True:
  165. start = _parse_varint(iterator) * 2
  166. length = _parse_varint(iterator) * 2
  167. end = start + length
  168. target = _parse_varint(iterator) * 2
  169. dl = _parse_varint(iterator)
  170. depth = dl >> 1
  171. lasti = bool(dl & 1)
  172. entries.append(_ExceptionTableEntry(start, end, target, depth, lasti))
  173. except StopIteration:
  174. return entries
  175. def prefer_double_quote(string: str) -> str:
  176. """
  177. Prefer a double-quoted string over a single-quoted string when
  178. possible. ``string`` is expected to already be a repr()-like
  179. representation with quoting already in it.
  180. Python formatting seems now to prefer double-quotes, even though
  181. it's repr() function typically prefers single quotes.
  182. Using the form that Python typically uses in its source can
  183. make things easier on users of this, like decompilers.
  184. """
  185. if string[1:-1].find('"') == -1:
  186. return f'"{string[1:-1]}"'
  187. return string
  188. def get_logical_instruction_at_offset(
  189. bytecode,
  190. offset: int,
  191. opc,
  192. varnames=None,
  193. names=None,
  194. constants=None,
  195. cells=None,
  196. linestarts=None,
  197. line_offset=0,
  198. exception_entries=None,
  199. labels=None,
  200. ):
  201. """
  202. Return a single logical instruction for `bytecode` at offset `offset`.
  203. if the opcode at offset is EXTENDED_ARG, then instructions are returned
  204. until we no longer have an EXTENDED_ARG instruction. Note that the
  205. last non-EXTENDED_ARG instruction will have its argument value adjusted
  206. to note the increased size of the argument.
  207. """
  208. if labels is None:
  209. labels = opc.findlabels(bytecode, opc)
  210. # PERFORMANCE FIX: Only add exception labels if we're building labels ourselves
  211. # When called from get_instructions_bytes, labels already includes exception targets
  212. if exception_entries is not None:
  213. for start, end, target, _, _ in exception_entries:
  214. if target not in labels:
  215. labels.append(target)
  216. # label_maps = get_jump_target_maps(bytecode, opc)
  217. # FIXME: We really need to distinguish 3.6.0a1 from 3.6.a3.
  218. # See below FIXME
  219. python_36 = True if opc.python_version >= (3, 6) else False
  220. starts_line = None
  221. n = len(bytecode)
  222. extended_arg_count = 0
  223. extended_arg = 0
  224. if hasattr(opc, "EXTENDED_ARG"):
  225. extended_arg_size = instruction_size(opc.EXTENDED_ARG, opc)
  226. else:
  227. extended_arg_size = 0
  228. # This is not necessarily true initially, but it gets us through the
  229. # loop below.
  230. last_op_was_extended_arg = True
  231. i = offset
  232. while i < n and last_op_was_extended_arg:
  233. op = code2num(bytecode, i)
  234. opname = opc.opname[op]
  235. optype = get_optype(op, opc)
  236. offset = i
  237. if linestarts is not None:
  238. starts_line = linestarts.get(i, None)
  239. if starts_line is not None:
  240. starts_line += line_offset
  241. is_jump_target = i in labels
  242. i += 1
  243. arg = None
  244. argval = None
  245. argrepr = ""
  246. has_arg = op_has_argument(op, opc)
  247. if has_arg:
  248. if python_36:
  249. arg = code2num(bytecode, i) | extended_arg
  250. extended_arg = (arg << 8) if opname == "EXTENDED_ARG" else 0
  251. # FIXME: Python 3.6.0a1 is 2, for 3.6.a3 we have 1
  252. i += 1
  253. else:
  254. arg = (
  255. code2num(bytecode, i)
  256. + code2num(bytecode, i + 1) * 0x100
  257. + extended_arg
  258. )
  259. i += 2
  260. extended_arg = arg * 0x10000 if opname == "EXTENDED_ARG" else 0
  261. # Set argval to the dereferenced value of the argument when
  262. # available, and argrepr to the string representation of argval.
  263. # disassemble_bytes needs the string repr of the
  264. # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
  265. argval = arg
  266. # create a localsplusnames table that resolves duplicates.
  267. localsplusnames = (varnames or tuple()) + tuple(
  268. name for name in (cells or tuple()) if name not in varnames
  269. )
  270. if op in opc.CONST_OPS:
  271. argval, argrepr = _get_const_info(arg, constants)
  272. elif op in opc.NAME_OPS:
  273. if opc.version_tuple >= (3, 11) and opname == "LOAD_GLOBAL":
  274. argval, argrepr = _get_name_info(arg >> 1, names)
  275. if arg & 1:
  276. argrepr = "NULL + " + argrepr
  277. elif opc.version_tuple >= (3, 12) and opname == "LOAD_ATTR":
  278. argval, argrepr = _get_name_info(arg >> 1, names)
  279. if arg & 1:
  280. argrepr = "NULL|self + " + argrepr
  281. elif opc.version_tuple >= (3, 12) and opname == "LOAD_SUPER_ATTR":
  282. argval, argrepr = _get_name_info(arg >> 2, names)
  283. if arg & 1:
  284. argrepr = "NULL|self + " + argrepr
  285. else:
  286. argval, argrepr = _get_name_info(arg, names)
  287. elif op in opc.JREL_OPS:
  288. signed_arg = -arg if "JUMP_BACKWARD" in opname else arg
  289. argval = i + get_jump_val(signed_arg, opc.python_version)
  290. # check cache instructions for python 3.13
  291. if opc.version_tuple >= (3, 13):
  292. if opc.opname[op] in [
  293. "POP_JUMP_IF_TRUE",
  294. "POP_JUMP_IF_FALSE",
  295. "POP_JUMP_IF_NONE",
  296. "POP_JUMP_IF_NOT_NONE",
  297. "JUMP_BACKWARD",
  298. ]:
  299. argval += 2
  300. # FOR_ITER has a cache instruction in 3.12
  301. if opc.version_tuple >= (3, 12) and opname == "FOR_ITER":
  302. argval += 2
  303. argrepr = "to " + repr(argval)
  304. elif op in opc.JABS_OPS:
  305. argval = get_jump_val(arg, opc.python_version)
  306. argrepr = "to " + repr(argval)
  307. elif op in opc.LOCAL_OPS:
  308. if opc.version_tuple >= (3, 13) and opname in (
  309. "LOAD_FAST_LOAD_FAST",
  310. "STORE_FAST_LOAD_FAST",
  311. "STORE_FAST_STORE_FAST",
  312. ):
  313. arg1 = arg >> 4
  314. arg2 = arg & 15
  315. argval1, argrepr1 = _get_name_info(arg1, localsplusnames)
  316. argval2, argrepr2 = _get_name_info(arg2, localsplusnames)
  317. argval = argval1, argval2
  318. argrepr = argrepr1 + ", " + argrepr2
  319. elif opc.version_tuple >= (3, 11):
  320. argval, argrepr = _get_name_info(arg, localsplusnames)
  321. else:
  322. argval, argrepr = _get_name_info(arg, varnames)
  323. elif op in opc.FREE_OPS:
  324. if opc.version_tuple >= (3, 11):
  325. argval, argrepr = _get_name_info(arg, localsplusnames)
  326. else:
  327. argval, argrepr = _get_name_info(arg, cells)
  328. elif op in opc.COMPARE_OPS:
  329. if opc.python_version >= (3, 13):
  330. # The fifth-lowest bit of the oparg now indicates a forced conversion to bool.
  331. argval = opc.cmp_op[arg >> 5]
  332. elif opc.python_version >= (3, 12):
  333. argval = opc.cmp_op[arg >> 4]
  334. else:
  335. argval = opc.cmp_op[arg]
  336. argrepr = argval
  337. elif op in opc.NARGS_OPS:
  338. opname = opname
  339. if python_36 and opname in ("CALL_FUNCTION", "CALL_FUNCTION_EX"):
  340. if opname == "CALL_FUNCTION":
  341. argrepr = format_CALL_FUNCTION(code2num(bytecode, i - 1))
  342. else:
  343. assert opname == "CALL_FUNCTION_EX"
  344. argrepr = format_CALL_FUNCTION_EX(code2num(bytecode, i - 1))
  345. else:
  346. if not (
  347. python_36
  348. or opname in ("RAISE_VARARGS", "DUP_TOPX", "MAKE_FUNCTION")
  349. ):
  350. argrepr = "%d positional, %d named" % (
  351. code2num(bytecode, i - 2),
  352. code2num(bytecode, i - 1),
  353. )
  354. if hasattr(opc, "opcode_arg_fmt") and opname in opc.opcode_arg_fmt:
  355. argrepr = opc.opcode_arg_fmt[opname](arg)
  356. else:
  357. if python_36:
  358. i += 1
  359. if hasattr(opc, "opcode_arg_fmt") and opname in opc.opcode_arg_fmt:
  360. argrepr = opc.opcode_arg_fmt[opname](arg)
  361. inst_size = instruction_size(op, opc) + (extended_arg_count * extended_arg_size)
  362. start_offset = offset if opc.oppop[op] == 0 else None
  363. yield Instruction(
  364. is_jump_target=is_jump_target,
  365. starts_line=starts_line,
  366. offset=offset,
  367. opname=opname,
  368. opcode=op,
  369. has_arg=has_arg,
  370. arg=arg,
  371. argval=argval,
  372. argrepr=argrepr,
  373. tos_str=None,
  374. positions=None,
  375. optype=optype,
  376. inst_size=inst_size,
  377. has_extended_arg=extended_arg_count != 0,
  378. fallthrough=None,
  379. start_offset=start_offset,
  380. )
  381. # fallthrough
  382. last_op_was_extended_arg = True if opname == "EXTENDED_ARG" else False
  383. extended_arg_count = extended_arg_count + 1 if last_op_was_extended_arg else 0
  384. # end loop
  385. def next_offset(op: int, opc, offset: int) -> int:
  386. """Returns the bytecode offset for the instruction that is assumed to
  387. start at `offset` and has opcode `op`. opc contains information for the
  388. bytecode version of that we should be using.
  389. """
  390. return offset + instruction_size(op, opc)
  391. def get_instructions_bytes(
  392. bytecode,
  393. opc,
  394. varnames=None,
  395. names=None,
  396. constants=None,
  397. cells=None,
  398. linestarts=None,
  399. line_offset=0,
  400. exception_entries=None,
  401. ):
  402. """
  403. Iterate over the instructions in a bytecode string.
  404. Generates a sequence of Instruction namedtuples giving the details of each
  405. opcode. Additional information about the code's runtime environment
  406. e.g., variable names, constants, can be specified using optional
  407. arguments.
  408. """
  409. labels = opc.findlabels(bytecode, opc)
  410. # PERFORMANCE FIX: Build exception labels ONCE, not on every iteration
  411. # The old code was O(n^2) because it rebuilt the same list every call to
  412. # get_logical_instruction_at_offset
  413. if exception_entries is not None:
  414. for start, end, target, _, _ in exception_entries:
  415. # Only add the target offset, not every offset in the range
  416. # This matches what get_logical_instruction_at_offset expects
  417. if target not in labels:
  418. labels.append(target)
  419. n = len(bytecode)
  420. offset = 0
  421. while offset < n:
  422. instructions = list(
  423. get_logical_instruction_at_offset(
  424. bytecode,
  425. offset,
  426. opc,
  427. varnames=varnames,
  428. names=names,
  429. constants=constants,
  430. cells=cells,
  431. linestarts=linestarts,
  432. line_offset=0,
  433. exception_entries=exception_entries,
  434. labels=labels
  435. )
  436. )
  437. for instruction in instructions:
  438. yield instruction
  439. offset = next_offset(instruction.opcode, opc, instruction.offset)
  440. class Bytecode:
  441. """Bytecode operations involving a Python code object.
  442. Instantiate this with a function, method, string of code, or a code object
  443. (as returned by compile()).
  444. Iterating over these yields the bytecode operations as Instruction instances.
  445. """
  446. def __init__(self, x, opc, first_line=None, current_offset=None, dup_lines: bool=True) -> None:
  447. self.codeobj = co = get_code_object(x)
  448. self._line_offset = 0
  449. self._cell_names = ()
  450. if opc.version_tuple >= (1, 5):
  451. if first_line is None:
  452. self.first_line = co.co_firstlineno
  453. else:
  454. self.first_line = first_line
  455. self._line_offset = first_line - co.co_firstlineno
  456. if opc.version_tuple > (2, 0):
  457. self._cell_names = co.co_cellvars + co.co_freevars
  458. pass
  459. pass
  460. self._linestarts = dict(opc.findlinestarts(co, dup_lines=dup_lines))
  461. self._original_object = x
  462. self.opc = opc
  463. self.opnames = opc.opname
  464. self.current_offset = current_offset
  465. if opc.version_tuple >= (3, 11) and not opc.is_pypy and hasattr(co, "co_exceptiontable"):
  466. self.exception_entries = parse_exception_table(co.co_exceptiontable)
  467. else:
  468. self.exception_entries = None
  469. def __iter__(self):
  470. co = self.codeobj
  471. return get_instructions_bytes(
  472. co.co_code,
  473. self.opc,
  474. co.co_varnames,
  475. co.co_names,
  476. co.co_consts,
  477. self._cell_names,
  478. self._linestarts,
  479. line_offset=self._line_offset,
  480. exception_entries=self.exception_entries,
  481. )
  482. def __repr__(self) -> str:
  483. return f"{self.__class__.__name__}({self._original_object!r})"
  484. @classmethod
  485. def from_traceback(cls, tb, opc=None):
  486. """Construct a Bytecode from the given traceback"""
  487. if opc is None:
  488. opc = get_opcode_module(sys.version_info, VARIANT)
  489. while tb.tb_next:
  490. tb = tb.tb_next
  491. return cls(
  492. tb.tb_frame.f_code, opc=opc, first_line=None, current_offset=tb.tb_lasti
  493. )
  494. def info(self) -> str:
  495. """Return formatted information about the code object."""
  496. return format_code_info(self.codeobj, self.opc.version_tuple)
  497. def dis(self, asm_format: str="classic", show_source: bool=False) -> str:
  498. """Return a formatted view of the bytecode operations."""
  499. co = self.codeobj
  500. filename = co.co_filename
  501. if self.current_offset is not None:
  502. offset = self.current_offset
  503. else:
  504. offset = -1
  505. output = StringIO()
  506. if self.opc.version_tuple > (2, 0):
  507. cells = self._cell_names
  508. line_starts = self._linestarts
  509. else:
  510. cells = None
  511. line_starts = None
  512. first_line_number = co.co_firstlineno if hasattr(co, "co_firstlineno") else None
  513. if inspect.iscode(co):
  514. filename = inspect.getfile(co)
  515. if isinstance(filename, UnicodeForPython3):
  516. filename = str(filename)
  517. self.disassemble_bytes(
  518. co.co_code,
  519. varnames=co.co_varnames,
  520. names=co.co_names,
  521. constants=co.co_consts,
  522. cells=cells,
  523. line_starts=line_starts,
  524. line_offset=self._line_offset,
  525. file=output,
  526. lasti=offset,
  527. asm_format=asm_format,
  528. filename=filename,
  529. show_source=show_source,
  530. first_line_number=first_line_number,
  531. exception_entries=self.exception_entries,
  532. )
  533. return output.getvalue()
  534. def distb(self, tb=None) -> None:
  535. """Disassemble a traceback (default: last traceback)."""
  536. if tb is None:
  537. try:
  538. tb = sys.last_traceback
  539. except AttributeError:
  540. raise RuntimeError("no last traceback to disassemble")
  541. while tb.tb_next:
  542. tb = tb.tb_next
  543. assert tb is not None
  544. self.disassemble_bytes(tb.tb_frame.f_code, tb.tb_lasti)
  545. def disassemble_bytes(
  546. self,
  547. bytecode: Union[CodeType, bytes, str],
  548. lasti: int = -1,
  549. varnames=None,
  550. names=None,
  551. constants=None,
  552. cells=None,
  553. line_starts=None,
  554. file=sys.stdout,
  555. line_offset=0,
  556. asm_format="classic",
  557. filename: Optional[str] = None,
  558. show_source=True,
  559. first_line_number: Optional[int] = None,
  560. exception_entries=None,
  561. ) -> list:
  562. # Omit the line number column entirely if we have no line number info
  563. show_lineno = line_starts is not None or self.opc.version_tuple < (2, 3)
  564. show_source = show_source and show_lineno and first_line_number and filename
  565. def show_source_text(line_number: Optional[int]) -> None:
  566. """
  567. Show the Python source text if all conditions are right:
  568. * source text was requested - this implies other checks
  569. seen above
  570. * the source is available via linecache.getline()
  571. """
  572. # There is some redundancy in the condition below
  573. # to make type checking happy. In reality,
  574. # only the show_source is tested at runtime.
  575. if show_source and filename and line_number:
  576. source_text = getline(filename, line_number).lstrip()
  577. if source_text.startswith('"""'):
  578. source_text = get_docstring(
  579. filename, line_number + 1, source_text.rstrip()
  580. )
  581. if source_text:
  582. file.write(" " * 13 + "# " + source_text)
  583. show_source_text(first_line_number)
  584. # Old Python's use "SET_LINENO" to set a line number
  585. set_lineno_number = 0
  586. last_was_set_lineno = False
  587. # TODO?: Adjust width upwards if max(line_starts.values()) >= 1000?
  588. lineno_width = 3 if show_lineno else 0
  589. instructions = []
  590. extended_arg_starts_line: Optional[int] = None
  591. extended_arg_jump_target_offset: Optional[int] = None
  592. for instr in get_instructions_bytes(
  593. bytecode,
  594. self.opc,
  595. varnames,
  596. names,
  597. constants,
  598. cells,
  599. line_starts,
  600. line_offset=line_offset,
  601. exception_entries=exception_entries,
  602. ):
  603. # Python 1.x into early 2.0 uses SET_LINENO
  604. if last_was_set_lineno:
  605. instr = Instruction(
  606. opcode=instr.opcode,
  607. opname=instr.opname,
  608. arg=instr.arg,
  609. argval=instr.argval,
  610. argrepr=instr.argrepr,
  611. offset=instr.offset,
  612. starts_line=set_lineno_number, # this is the only field that changes
  613. is_jump_target=instr.is_jump_target,
  614. positions=None,
  615. optype=instr.optype,
  616. has_arg=instr.has_arg,
  617. inst_size=instr.inst_size,
  618. has_extended_arg=instr.has_extended_arg,
  619. tos_str=None,
  620. start_offset=None,
  621. )
  622. last_was_set_lineno = False
  623. if instr.opname == "SET_LINENO":
  624. set_lineno_number = instr.argval
  625. last_was_set_lineno = True
  626. if instr.opname == "EXTENDED_ARG" and asm_format == "asm":
  627. extended_arg_starts_line = instr.starts_line
  628. extended_arg_jump_target_offset = instr.offset
  629. continue
  630. if extended_arg_starts_line:
  631. instr = Instruction(
  632. opcode=instr.opcode,
  633. opname=instr.opname,
  634. arg=instr.arg,
  635. argval=instr.argval,
  636. argrepr=instr.argrepr,
  637. offset=instr.offset,
  638. starts_line=extended_arg_starts_line, # this is the only field that changes
  639. is_jump_target=instr.is_jump_target,
  640. positions=instr.positions,
  641. optype=instr.optype,
  642. has_arg=instr.has_arg,
  643. inst_size=instr.inst_size,
  644. has_extended_arg=instr.has_extended_arg,
  645. tos_str=instr.tos_str,
  646. fallthrough=instr.fallthrough,
  647. start_offset=instr.start_offset,
  648. )
  649. extended_arg_starts_line = None
  650. if extended_arg_jump_target_offset is not None:
  651. instr = Instruction(
  652. opcode=instr.opcode,
  653. opname=instr.opname,
  654. arg=instr.arg,
  655. argval=instr.argval,
  656. argrepr=instr.argrepr,
  657. offset=extended_arg_jump_target_offset,
  658. starts_line=extended_arg_starts_line,
  659. is_jump_target=True,
  660. positions=instr.positions,
  661. optype=instr.optype,
  662. has_arg=instr.has_arg,
  663. inst_size=instr.inst_size,
  664. has_extended_arg=instr.has_extended_arg,
  665. tos_str=instr.tos_str,
  666. fallthrough=instr.fallthrough,
  667. start_offset=instr.start_offset,
  668. )
  669. extended_arg_jump_target_offset = None
  670. instructions.append(instr)
  671. new_source_line = show_lineno and (
  672. extended_arg_starts_line
  673. or instr.starts_line is not None
  674. and instr.offset > 0
  675. )
  676. if new_source_line:
  677. file.write("\n")
  678. show_source_text(
  679. extended_arg_starts_line
  680. if extended_arg_starts_line
  681. else instr.starts_line
  682. )
  683. is_current_instr = instr.offset == lasti
  684. # Python 3.11 introduces "CACHE" and the convention seems to be
  685. # to not print these normally.
  686. if instr.opname == "CACHE" and asm_format not in (
  687. "extended_bytes",
  688. "bytes",
  689. ):
  690. continue
  691. file.write(
  692. instr.disassemble(
  693. self.opc,
  694. line_starts,
  695. lineno_width,
  696. is_current_instr,
  697. asm_format,
  698. instructions,
  699. )
  700. + "\n"
  701. )
  702. # Python bytecode before 1.4 has a RESERVE_FAST instruction that
  703. # store STORE_FAST and LOAD_FAST instructions in a different area
  704. # currently we can't track names in this area, but instead use
  705. # locals and hope the two are the same.
  706. if instr.opname == "RESERVE_FAST":
  707. file.write(
  708. "# Warning: subsequent LOAD_FAST and STORE_FAST after RESERVE_FAST "
  709. "are inaccurate here in Python before 1.5\n"
  710. )
  711. pass
  712. return instructions
  713. def get_instructions(self, x, first_line=None):
  714. """Iterator for the opcodes in methods, functions or code
  715. Generates a series of Instruction named tuples giving the details of
  716. each operation in the supplied code.
  717. If *first_line* is not None, it indicates the line number that should
  718. be reported for the first source line in the disassembled code.
  719. Otherwise, the source line information (if any) is taken directly from
  720. the disassembled code object.
  721. """
  722. co = get_code_object(x)
  723. cell_names = co.co_cellvars + co.co_freevars
  724. line_starts = dict(self.opc.findlinestarts(co))
  725. if first_line is not None:
  726. line_offset = first_line - co.co_firstlineno
  727. else:
  728. line_offset = 0
  729. return get_instructions_bytes(
  730. co.co_code,
  731. self.opc,
  732. co.co_varnames,
  733. co.co_names,
  734. co.co_consts,
  735. cell_names,
  736. line_starts,
  737. line_offset,
  738. )
  739. def list2bytecode(inst_list: Iterable, opc, varnames: str, consts: Tuple[None, int]) -> bytes:
  740. """Convert list/tuple of list/tuples to bytecode
  741. _names_ contains a list of name objects
  742. """
  743. bc = []
  744. for i, opcodes in enumerate(inst_list):
  745. opname = opcodes[0]
  746. operands = opcodes[1:]
  747. if opname not in opc.opname:
  748. raise TypeError(
  749. "error at item %d [%s, %s], opcode not valid" % (i, opname, operands)
  750. )
  751. opcode = opc.opmap[opname]
  752. bc.append(opcode)
  753. print(opname, operands)
  754. gen = (j for j in operands if operands)
  755. for j in gen:
  756. k = (consts if opcode in opc.CONST_OPS else varnames).index(j)
  757. if k == -1:
  758. raise TypeError(
  759. f"operand {i} [{opname}, {operands}], not found in names"
  760. )
  761. else:
  762. bc += num2code(k)
  763. pass
  764. pass
  765. pass
  766. return bytes(bc)
  767. if __name__ == "__main__":
  768. import xdis.opcodes.opcode_27 as opcode_27
  769. import xdis.opcodes.opcode_34 as opcode_34
  770. import xdis.opcodes.opcode_36 as opcode_36
  771. from xdis.version_info import PYTHON3
  772. my_constants = (None, 2)
  773. var_names = "a"
  774. instructions = [
  775. ("LOAD_CONST", 2),
  776. ("STORE_FAST", "a"),
  777. ("LOAD_FAST", "a"),
  778. ("RETURN_VALUE",),
  779. ]
  780. def f() -> int:
  781. a = 2
  782. return a
  783. if PYTHON3:
  784. print(f.__code__.co_code)
  785. else:
  786. print(f.func_code.co_code)
  787. bc = list2bytecode(instructions, opcode_27, var_names, my_constants)
  788. print(bc)
  789. bc = list2bytecode(instructions, opcode_34, var_names, my_constants)
  790. print(bc)
  791. bc = list2bytecode(instructions, opcode_36, var_names, my_constants)
  792. print(bc)