| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902 |
- # Copyright (c) 2018-2025 by Rocky Bernstein
- #
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- """
- Python bytecode and instruction classes Extracted from Python 3
- "dis" module but generalized to allow running on multiple code types
- including those from Python 1.x and 2.x.
- """
- import collections
- import inspect
- import sys
- from io import StringIO
- from linecache import getline
- from types import CodeType
- from typing import Iterable, Iterator, Optional, Tuple, Union
- from xdis.cross_dis import (
- format_code_info,
- get_code_object,
- instruction_size,
- op_has_argument,
- )
- from xdis.cross_types import UnicodeForPython3
- from xdis.instruction import Instruction
- from xdis.op_imports import get_opcode_module
- from xdis.opcodes.opcode_36 import format_CALL_FUNCTION, format_CALL_FUNCTION_EX
- from xdis.util import code2num, num2code
- from xdis.version_info import IS_PYPY
- VARIANT = "pypy" if IS_PYPY else None
- def get_docstring(filename: str, line_number: int, doc_str: str) -> str:
- while len(doc_str) < 80:
- next_line = getline(filename, line_number).strip()
- doc_str += "\\n" + next_line
- if next_line.endswith('"""'):
- break
- line_number += 1
- if len(doc_str) > 80:
- doc_str = doc_str[:-7] + '... """'
- return doc_str + "\n"
- def get_jump_val(jump_arg: int, version: tuple) -> int:
- return jump_arg * 2 if version[:2] >= (3, 10) else jump_arg
- def get_const_info(const_index, const_list):
- """Helper to get optional details about const references
- Returns the dereferenced constant and its repr if the constant
- list is defined.
- Otherwise, returns the constant index and its repr().
- """
- arg_val = const_index
- if const_list is not None:
- arg_val = const_list[const_index]
- arg_repr = (
- prefer_double_quote(repr(arg_val))
- if isinstance(arg_val, str)
- else repr(arg_val)
- )
- # Float values "nan" and "inf" are not directly representable in Python at least
- # before 3.5 and even there it is via a library constant.
- # So we will canonicalize their representation as float('nan') and float('inf')
- if isinstance(arg_val, float) and str(arg_val) in frozenset(
- ["nan", "-nan", "inf", "-inf"]
- ):
- return arg_val, f"float('{arg_val}')"
- return arg_val, arg_repr
- # For compatibility
- _get_const_info = get_const_info
- def get_name_info(name_index, name_list):
- """Helper to get optional details about named references
- Returns the dereferenced name as both value and repr if the name
- list is defined.
- Otherwise, returns the name index and its repr().
- """
- argval = name_index
- if (
- name_list is not None
- # PyPY seems to "optimize" out constant names,
- # so we need the following condition to handle this situation:
- and name_index < len(name_list)
- ):
- argval = name_list[name_index]
- argrepr = argval
- else:
- argrepr = repr(argval)
- return argval, argrepr
- def get_optype(opcode: int, opc) -> str:
- """Helper to determine what class of instructions ``opcode`` is in.
- Return is a string in:
- compare, const, free, jabs, jrel, local, name, nargs, or ??
- """
- if opcode in opc.COMPARE_OPS:
- return "compare"
- elif opcode in opc.CONST_OPS:
- return "const"
- elif opcode in opc.FREE_OPS:
- return "free"
- elif opcode in opc.JABS_OPS:
- return "jabs"
- elif opcode in opc.JREL_OPS:
- return "jrel"
- elif opcode in opc.LOCAL_OPS:
- return "local"
- elif opcode in opc.NAME_OPS:
- return "name"
- elif opcode in opc.NARGS_OPS:
- return "nargs"
- # This has to come after NARGS_OPS. Some are in both?
- elif opcode in opc.VARGS_OPS:
- return "vargs"
- elif opcode in opc.ENCODED_ARG_OPS:
- return "encoded_arg"
- return "??"
- # For compatibility
- _get_name_info = get_name_info
- def offset2line(offset: int, linestarts):
- """linestarts is expected to be a *list of (offset, line number)
- where both offset and line number are in increasing order.
- Return the closest line number at or below the offset.
- If offset is less than the first line number given in `linestarts`,
- return line number 0.
- """
- if len(linestarts) == 0 or offset < linestarts[0][0]:
- return 0
- low = 0
- high = len(linestarts) - 1
- mid = (low + high + 1) // 2
- while low <= high:
- if linestarts[mid][0] > offset:
- high = mid - 1
- elif linestarts[mid][0] < offset:
- low = mid + 1
- else:
- return linestarts[mid][1]
- mid = (low + high + 1) // 2
- pass
- # Not found. Return the closest position below.
- if mid >= len(linestarts):
- return linestarts[len(linestarts) - 1][1]
- return linestarts[high][1]
- def _parse_varint(iterator: Iterator[int]) -> int:
- b = next(iterator)
- val = b & 63
- while b & 64:
- val <<= 6
- b = next(iterator)
- val |= b & 63
- return val
- _ExceptionTableEntry = collections.namedtuple(
- "_ExceptionTableEntry", "start end target depth lasti"
- )
- def parse_exception_table(exception_table: bytes):
- iterator = iter(exception_table)
- entries = []
- try:
- while True:
- start = _parse_varint(iterator) * 2
- length = _parse_varint(iterator) * 2
- end = start + length
- target = _parse_varint(iterator) * 2
- dl = _parse_varint(iterator)
- depth = dl >> 1
- lasti = bool(dl & 1)
- entries.append(_ExceptionTableEntry(start, end, target, depth, lasti))
- except StopIteration:
- return entries
- def prefer_double_quote(string: str) -> str:
- """
- Prefer a double-quoted string over a single-quoted string when
- possible. ``string`` is expected to already be a repr()-like
- representation with quoting already in it.
- Python formatting seems now to prefer double-quotes, even though
- it's repr() function typically prefers single quotes.
- Using the form that Python typically uses in its source can
- make things easier on users of this, like decompilers.
- """
- if string[1:-1].find('"') == -1:
- return f'"{string[1:-1]}"'
- return string
- def get_logical_instruction_at_offset(
- bytecode,
- offset: int,
- opc,
- varnames=None,
- names=None,
- constants=None,
- cells=None,
- linestarts=None,
- line_offset=0,
- exception_entries=None,
- labels=None,
- ):
- """
- Return a single logical instruction for `bytecode` at offset `offset`.
- if the opcode at offset is EXTENDED_ARG, then instructions are returned
- until we no longer have an EXTENDED_ARG instruction. Note that the
- last non-EXTENDED_ARG instruction will have its argument value adjusted
- to note the increased size of the argument.
- """
- if labels is None:
- labels = opc.findlabels(bytecode, opc)
- # PERFORMANCE FIX: Only add exception labels if we're building labels ourselves
- # When called from get_instructions_bytes, labels already includes exception targets
- if exception_entries is not None:
- for start, end, target, _, _ in exception_entries:
- if target not in labels:
- labels.append(target)
- # label_maps = get_jump_target_maps(bytecode, opc)
- # FIXME: We really need to distinguish 3.6.0a1 from 3.6.a3.
- # See below FIXME
- python_36 = True if opc.python_version >= (3, 6) else False
- starts_line = None
- n = len(bytecode)
- extended_arg_count = 0
- extended_arg = 0
- if hasattr(opc, "EXTENDED_ARG"):
- extended_arg_size = instruction_size(opc.EXTENDED_ARG, opc)
- else:
- extended_arg_size = 0
- # This is not necessarily true initially, but it gets us through the
- # loop below.
- last_op_was_extended_arg = True
- i = offset
- while i < n and last_op_was_extended_arg:
- op = code2num(bytecode, i)
- opname = opc.opname[op]
- optype = get_optype(op, opc)
- offset = i
- if linestarts is not None:
- starts_line = linestarts.get(i, None)
- if starts_line is not None:
- starts_line += line_offset
- is_jump_target = i in labels
- i += 1
- arg = None
- argval = None
- argrepr = ""
- has_arg = op_has_argument(op, opc)
- if has_arg:
- if python_36:
- arg = code2num(bytecode, i) | extended_arg
- extended_arg = (arg << 8) if opname == "EXTENDED_ARG" else 0
- # FIXME: Python 3.6.0a1 is 2, for 3.6.a3 we have 1
- i += 1
- else:
- arg = (
- code2num(bytecode, i)
- + code2num(bytecode, i + 1) * 0x100
- + extended_arg
- )
- i += 2
- extended_arg = arg * 0x10000 if opname == "EXTENDED_ARG" else 0
- # Set argval to the dereferenced value of the argument when
- # available, and argrepr to the string representation of argval.
- # disassemble_bytes needs the string repr of the
- # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
- argval = arg
- # create a localsplusnames table that resolves duplicates.
- localsplusnames = (varnames or tuple()) + tuple(
- name for name in (cells or tuple()) if name not in varnames
- )
- if op in opc.CONST_OPS:
- argval, argrepr = _get_const_info(arg, constants)
- elif op in opc.NAME_OPS:
- if opc.version_tuple >= (3, 11) and opname == "LOAD_GLOBAL":
- argval, argrepr = _get_name_info(arg >> 1, names)
- if arg & 1:
- argrepr = "NULL + " + argrepr
- elif opc.version_tuple >= (3, 12) and opname == "LOAD_ATTR":
- argval, argrepr = _get_name_info(arg >> 1, names)
- if arg & 1:
- argrepr = "NULL|self + " + argrepr
- elif opc.version_tuple >= (3, 12) and opname == "LOAD_SUPER_ATTR":
- argval, argrepr = _get_name_info(arg >> 2, names)
- if arg & 1:
- argrepr = "NULL|self + " + argrepr
- else:
- argval, argrepr = _get_name_info(arg, names)
- elif op in opc.JREL_OPS:
- signed_arg = -arg if "JUMP_BACKWARD" in opname else arg
- argval = i + get_jump_val(signed_arg, opc.python_version)
- # check cache instructions for python 3.13
- if opc.version_tuple >= (3, 13):
- if opc.opname[op] in [
- "POP_JUMP_IF_TRUE",
- "POP_JUMP_IF_FALSE",
- "POP_JUMP_IF_NONE",
- "POP_JUMP_IF_NOT_NONE",
- "JUMP_BACKWARD",
- ]:
- argval += 2
- # FOR_ITER has a cache instruction in 3.12
- if opc.version_tuple >= (3, 12) and opname == "FOR_ITER":
- argval += 2
- argrepr = "to " + repr(argval)
- elif op in opc.JABS_OPS:
- argval = get_jump_val(arg, opc.python_version)
- argrepr = "to " + repr(argval)
- elif op in opc.LOCAL_OPS:
- if opc.version_tuple >= (3, 13) and opname in (
- "LOAD_FAST_LOAD_FAST",
- "STORE_FAST_LOAD_FAST",
- "STORE_FAST_STORE_FAST",
- ):
- arg1 = arg >> 4
- arg2 = arg & 15
- argval1, argrepr1 = _get_name_info(arg1, localsplusnames)
- argval2, argrepr2 = _get_name_info(arg2, localsplusnames)
- argval = argval1, argval2
- argrepr = argrepr1 + ", " + argrepr2
- elif opc.version_tuple >= (3, 11):
- argval, argrepr = _get_name_info(arg, localsplusnames)
- else:
- argval, argrepr = _get_name_info(arg, varnames)
- elif op in opc.FREE_OPS:
- if opc.version_tuple >= (3, 11):
- argval, argrepr = _get_name_info(arg, localsplusnames)
- else:
- argval, argrepr = _get_name_info(arg, cells)
- elif op in opc.COMPARE_OPS:
- if opc.python_version >= (3, 13):
- # The fifth-lowest bit of the oparg now indicates a forced conversion to bool.
- argval = opc.cmp_op[arg >> 5]
- elif opc.python_version >= (3, 12):
- argval = opc.cmp_op[arg >> 4]
- else:
- argval = opc.cmp_op[arg]
- argrepr = argval
- elif op in opc.NARGS_OPS:
- opname = opname
- if python_36 and opname in ("CALL_FUNCTION", "CALL_FUNCTION_EX"):
- if opname == "CALL_FUNCTION":
- argrepr = format_CALL_FUNCTION(code2num(bytecode, i - 1))
- else:
- assert opname == "CALL_FUNCTION_EX"
- argrepr = format_CALL_FUNCTION_EX(code2num(bytecode, i - 1))
- else:
- if not (
- python_36
- or opname in ("RAISE_VARARGS", "DUP_TOPX", "MAKE_FUNCTION")
- ):
- argrepr = "%d positional, %d named" % (
- code2num(bytecode, i - 2),
- code2num(bytecode, i - 1),
- )
- if hasattr(opc, "opcode_arg_fmt") and opname in opc.opcode_arg_fmt:
- argrepr = opc.opcode_arg_fmt[opname](arg)
- else:
- if python_36:
- i += 1
- if hasattr(opc, "opcode_arg_fmt") and opname in opc.opcode_arg_fmt:
- argrepr = opc.opcode_arg_fmt[opname](arg)
- inst_size = instruction_size(op, opc) + (extended_arg_count * extended_arg_size)
- start_offset = offset if opc.oppop[op] == 0 else None
- yield Instruction(
- is_jump_target=is_jump_target,
- starts_line=starts_line,
- offset=offset,
- opname=opname,
- opcode=op,
- has_arg=has_arg,
- arg=arg,
- argval=argval,
- argrepr=argrepr,
- tos_str=None,
- positions=None,
- optype=optype,
- inst_size=inst_size,
- has_extended_arg=extended_arg_count != 0,
- fallthrough=None,
- start_offset=start_offset,
- )
- # fallthrough
- last_op_was_extended_arg = True if opname == "EXTENDED_ARG" else False
- extended_arg_count = extended_arg_count + 1 if last_op_was_extended_arg else 0
- # end loop
- def next_offset(op: int, opc, offset: int) -> int:
- """Returns the bytecode offset for the instruction that is assumed to
- start at `offset` and has opcode `op`. opc contains information for the
- bytecode version of that we should be using.
- """
- return offset + instruction_size(op, opc)
- def get_instructions_bytes(
- bytecode,
- opc,
- varnames=None,
- names=None,
- constants=None,
- cells=None,
- linestarts=None,
- line_offset=0,
- exception_entries=None,
- ):
- """
- Iterate over the instructions in a bytecode string.
- Generates a sequence of Instruction namedtuples giving the details of each
- opcode. Additional information about the code's runtime environment
- e.g., variable names, constants, can be specified using optional
- arguments.
- """
- labels = opc.findlabels(bytecode, opc)
- # PERFORMANCE FIX: Build exception labels ONCE, not on every iteration
- # The old code was O(n^2) because it rebuilt the same list every call to
- # get_logical_instruction_at_offset
- if exception_entries is not None:
- for start, end, target, _, _ in exception_entries:
- # Only add the target offset, not every offset in the range
- # This matches what get_logical_instruction_at_offset expects
- if target not in labels:
- labels.append(target)
- n = len(bytecode)
- offset = 0
- while offset < n:
- instructions = list(
- get_logical_instruction_at_offset(
- bytecode,
- offset,
- opc,
- varnames=varnames,
- names=names,
- constants=constants,
- cells=cells,
- linestarts=linestarts,
- line_offset=0,
- exception_entries=exception_entries,
- labels=labels
- )
- )
- for instruction in instructions:
- yield instruction
- offset = next_offset(instruction.opcode, opc, instruction.offset)
- class Bytecode:
- """Bytecode operations involving a Python code object.
- Instantiate this with a function, method, string of code, or a code object
- (as returned by compile()).
- Iterating over these yields the bytecode operations as Instruction instances.
- """
- def __init__(self, x, opc, first_line=None, current_offset=None, dup_lines: bool=True) -> None:
- self.codeobj = co = get_code_object(x)
- self._line_offset = 0
- self._cell_names = ()
- if opc.version_tuple >= (1, 5):
- if first_line is None:
- self.first_line = co.co_firstlineno
- else:
- self.first_line = first_line
- self._line_offset = first_line - co.co_firstlineno
- if opc.version_tuple > (2, 0):
- self._cell_names = co.co_cellvars + co.co_freevars
- pass
- pass
- self._linestarts = dict(opc.findlinestarts(co, dup_lines=dup_lines))
- self._original_object = x
- self.opc = opc
- self.opnames = opc.opname
- self.current_offset = current_offset
- if opc.version_tuple >= (3, 11) and not opc.is_pypy and hasattr(co, "co_exceptiontable"):
- self.exception_entries = parse_exception_table(co.co_exceptiontable)
- else:
- self.exception_entries = None
- def __iter__(self):
- co = self.codeobj
- return get_instructions_bytes(
- co.co_code,
- self.opc,
- co.co_varnames,
- co.co_names,
- co.co_consts,
- self._cell_names,
- self._linestarts,
- line_offset=self._line_offset,
- exception_entries=self.exception_entries,
- )
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}({self._original_object!r})"
- @classmethod
- def from_traceback(cls, tb, opc=None):
- """Construct a Bytecode from the given traceback"""
- if opc is None:
- opc = get_opcode_module(sys.version_info, VARIANT)
- while tb.tb_next:
- tb = tb.tb_next
- return cls(
- tb.tb_frame.f_code, opc=opc, first_line=None, current_offset=tb.tb_lasti
- )
- def info(self) -> str:
- """Return formatted information about the code object."""
- return format_code_info(self.codeobj, self.opc.version_tuple)
- def dis(self, asm_format: str="classic", show_source: bool=False) -> str:
- """Return a formatted view of the bytecode operations."""
- co = self.codeobj
- filename = co.co_filename
- if self.current_offset is not None:
- offset = self.current_offset
- else:
- offset = -1
- output = StringIO()
- if self.opc.version_tuple > (2, 0):
- cells = self._cell_names
- line_starts = self._linestarts
- else:
- cells = None
- line_starts = None
- first_line_number = co.co_firstlineno if hasattr(co, "co_firstlineno") else None
- if inspect.iscode(co):
- filename = inspect.getfile(co)
- if isinstance(filename, UnicodeForPython3):
- filename = str(filename)
- self.disassemble_bytes(
- co.co_code,
- varnames=co.co_varnames,
- names=co.co_names,
- constants=co.co_consts,
- cells=cells,
- line_starts=line_starts,
- line_offset=self._line_offset,
- file=output,
- lasti=offset,
- asm_format=asm_format,
- filename=filename,
- show_source=show_source,
- first_line_number=first_line_number,
- exception_entries=self.exception_entries,
- )
- return output.getvalue()
- def distb(self, tb=None) -> None:
- """Disassemble a traceback (default: last traceback)."""
- if tb is None:
- try:
- tb = sys.last_traceback
- except AttributeError:
- raise RuntimeError("no last traceback to disassemble")
- while tb.tb_next:
- tb = tb.tb_next
- assert tb is not None
- self.disassemble_bytes(tb.tb_frame.f_code, tb.tb_lasti)
- def disassemble_bytes(
- self,
- bytecode: Union[CodeType, bytes, str],
- lasti: int = -1,
- varnames=None,
- names=None,
- constants=None,
- cells=None,
- line_starts=None,
- file=sys.stdout,
- line_offset=0,
- asm_format="classic",
- filename: Optional[str] = None,
- show_source=True,
- first_line_number: Optional[int] = None,
- exception_entries=None,
- ) -> list:
- # Omit the line number column entirely if we have no line number info
- show_lineno = line_starts is not None or self.opc.version_tuple < (2, 3)
- show_source = show_source and show_lineno and first_line_number and filename
- def show_source_text(line_number: Optional[int]) -> None:
- """
- Show the Python source text if all conditions are right:
- * source text was requested - this implies other checks
- seen above
- * the source is available via linecache.getline()
- """
- # There is some redundancy in the condition below
- # to make type checking happy. In reality,
- # only the show_source is tested at runtime.
- if show_source and filename and line_number:
- source_text = getline(filename, line_number).lstrip()
- if source_text.startswith('"""'):
- source_text = get_docstring(
- filename, line_number + 1, source_text.rstrip()
- )
- if source_text:
- file.write(" " * 13 + "# " + source_text)
- show_source_text(first_line_number)
- # Old Python's use "SET_LINENO" to set a line number
- set_lineno_number = 0
- last_was_set_lineno = False
- # TODO?: Adjust width upwards if max(line_starts.values()) >= 1000?
- lineno_width = 3 if show_lineno else 0
- instructions = []
- extended_arg_starts_line: Optional[int] = None
- extended_arg_jump_target_offset: Optional[int] = None
- for instr in get_instructions_bytes(
- bytecode,
- self.opc,
- varnames,
- names,
- constants,
- cells,
- line_starts,
- line_offset=line_offset,
- exception_entries=exception_entries,
- ):
- # Python 1.x into early 2.0 uses SET_LINENO
- if last_was_set_lineno:
- instr = Instruction(
- opcode=instr.opcode,
- opname=instr.opname,
- arg=instr.arg,
- argval=instr.argval,
- argrepr=instr.argrepr,
- offset=instr.offset,
- starts_line=set_lineno_number, # this is the only field that changes
- is_jump_target=instr.is_jump_target,
- positions=None,
- optype=instr.optype,
- has_arg=instr.has_arg,
- inst_size=instr.inst_size,
- has_extended_arg=instr.has_extended_arg,
- tos_str=None,
- start_offset=None,
- )
- last_was_set_lineno = False
- if instr.opname == "SET_LINENO":
- set_lineno_number = instr.argval
- last_was_set_lineno = True
- if instr.opname == "EXTENDED_ARG" and asm_format == "asm":
- extended_arg_starts_line = instr.starts_line
- extended_arg_jump_target_offset = instr.offset
- continue
- if extended_arg_starts_line:
- instr = Instruction(
- opcode=instr.opcode,
- opname=instr.opname,
- arg=instr.arg,
- argval=instr.argval,
- argrepr=instr.argrepr,
- offset=instr.offset,
- starts_line=extended_arg_starts_line, # this is the only field that changes
- is_jump_target=instr.is_jump_target,
- positions=instr.positions,
- optype=instr.optype,
- has_arg=instr.has_arg,
- inst_size=instr.inst_size,
- has_extended_arg=instr.has_extended_arg,
- tos_str=instr.tos_str,
- fallthrough=instr.fallthrough,
- start_offset=instr.start_offset,
- )
- extended_arg_starts_line = None
- if extended_arg_jump_target_offset is not None:
- instr = Instruction(
- opcode=instr.opcode,
- opname=instr.opname,
- arg=instr.arg,
- argval=instr.argval,
- argrepr=instr.argrepr,
- offset=extended_arg_jump_target_offset,
- starts_line=extended_arg_starts_line,
- is_jump_target=True,
- positions=instr.positions,
- optype=instr.optype,
- has_arg=instr.has_arg,
- inst_size=instr.inst_size,
- has_extended_arg=instr.has_extended_arg,
- tos_str=instr.tos_str,
- fallthrough=instr.fallthrough,
- start_offset=instr.start_offset,
- )
- extended_arg_jump_target_offset = None
- instructions.append(instr)
- new_source_line = show_lineno and (
- extended_arg_starts_line
- or instr.starts_line is not None
- and instr.offset > 0
- )
- if new_source_line:
- file.write("\n")
- show_source_text(
- extended_arg_starts_line
- if extended_arg_starts_line
- else instr.starts_line
- )
- is_current_instr = instr.offset == lasti
- # Python 3.11 introduces "CACHE" and the convention seems to be
- # to not print these normally.
- if instr.opname == "CACHE" and asm_format not in (
- "extended_bytes",
- "bytes",
- ):
- continue
- file.write(
- instr.disassemble(
- self.opc,
- line_starts,
- lineno_width,
- is_current_instr,
- asm_format,
- instructions,
- )
- + "\n"
- )
- # Python bytecode before 1.4 has a RESERVE_FAST instruction that
- # store STORE_FAST and LOAD_FAST instructions in a different area
- # currently we can't track names in this area, but instead use
- # locals and hope the two are the same.
- if instr.opname == "RESERVE_FAST":
- file.write(
- "# Warning: subsequent LOAD_FAST and STORE_FAST after RESERVE_FAST "
- "are inaccurate here in Python before 1.5\n"
- )
- pass
- return instructions
- def get_instructions(self, x, first_line=None):
- """Iterator for the opcodes in methods, functions or code
- Generates a series of Instruction named tuples giving the details of
- each operation in the supplied code.
- If *first_line* is not None, it indicates the line number that should
- be reported for the first source line in the disassembled code.
- Otherwise, the source line information (if any) is taken directly from
- the disassembled code object.
- """
- co = get_code_object(x)
- cell_names = co.co_cellvars + co.co_freevars
- line_starts = dict(self.opc.findlinestarts(co))
- if first_line is not None:
- line_offset = first_line - co.co_firstlineno
- else:
- line_offset = 0
- return get_instructions_bytes(
- co.co_code,
- self.opc,
- co.co_varnames,
- co.co_names,
- co.co_consts,
- cell_names,
- line_starts,
- line_offset,
- )
- def list2bytecode(inst_list: Iterable, opc, varnames: str, consts: Tuple[None, int]) -> bytes:
- """Convert list/tuple of list/tuples to bytecode
- _names_ contains a list of name objects
- """
- bc = []
- for i, opcodes in enumerate(inst_list):
- opname = opcodes[0]
- operands = opcodes[1:]
- if opname not in opc.opname:
- raise TypeError(
- "error at item %d [%s, %s], opcode not valid" % (i, opname, operands)
- )
- opcode = opc.opmap[opname]
- bc.append(opcode)
- print(opname, operands)
- gen = (j for j in operands if operands)
- for j in gen:
- k = (consts if opcode in opc.CONST_OPS else varnames).index(j)
- if k == -1:
- raise TypeError(
- f"operand {i} [{opname}, {operands}], not found in names"
- )
- else:
- bc += num2code(k)
- pass
- pass
- pass
- return bytes(bc)
- if __name__ == "__main__":
- import xdis.opcodes.opcode_27 as opcode_27
- import xdis.opcodes.opcode_34 as opcode_34
- import xdis.opcodes.opcode_36 as opcode_36
- from xdis.version_info import PYTHON3
- my_constants = (None, 2)
- var_names = "a"
- instructions = [
- ("LOAD_CONST", 2),
- ("STORE_FAST", "a"),
- ("LOAD_FAST", "a"),
- ("RETURN_VALUE",),
- ]
- def f() -> int:
- a = 2
- return a
- if PYTHON3:
- print(f.__code__.co_code)
- else:
- print(f.func_code.co_code)
- bc = list2bytecode(instructions, opcode_27, var_names, my_constants)
- print(bc)
- bc = list2bytecode(instructions, opcode_34, var_names, my_constants)
- print(bc)
- bc = list2bytecode(instructions, opcode_36, var_names, my_constants)
- print(bc)
|