| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475 |
- # Copyright (c) 2018-2024 by Rocky Bernstein
- #
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- """Python instruction class
- Extracted from Python 3's ``dis`` module but generalized to
- allow running on Python 2.
- """
- import re
- from typing import Any, Dict, NamedTuple, Optional, Union
- # _Instruction.tos_str.__doc__ = (
- # "If not None, a string representation of the top of the stack (TOS)"
- # )
- # # Python expressions can be straight-line, operator like-basic block code that take
- # # items off a stack and push a value onto the stack. In this case, in a linear scan
- # # we can basically build up an expression tree.
- # # Note this has to be the last field. Code to set this assumes this.
- # _Instruction.start_offset.__doc__ = (
- # "If not None, the offset of the first instruction feeding into the operation"
- # )
- _OPNAME_WIDTH = 20
- class AssembleFormat(NamedTuple):
- """
- A structure to hold the essential information
- that would be shown in a line of assembly under any
- formatting option, e.g. extended-bytes, or classic.
- Fields in the order they in which they are defined in constructing an object:
- is_jump_target: True if other code jumps to here,
- 'loop' if this is a loop beginning, which
- in Python can be determined jump to an earlier offset.
- Otherwise, False.
- is_current_instruction: True if we are stopped at this instruction.
- starts_line: Optional Line started by this opcode (if any). Otherwise None.
- offset: Start index of operation within bytecode sequence.
- opname: human-readable name for operation.
- opcode: numeric code for operation.
- has_arg: True if opcode takes an argument. In that case,
- ``argepr`` will have that value. False
- if this opcode doesn't take an argument. When False,
- don't look at ``argval`` or ``argrepr``.
- arg: Optional numeric argument to operation (if any). Otherwise, None.
- argrepr: human-readable description of operation argument.
- tos_str: If not None, a string representation of the top of the stack (TOS).
- This is obtained by scanning previous instructions and
- using information there and in their ``tos_str`` fields.
- """
- # True if other code jumps to here, the string "loop" if this is a loop
- # beginning, which in Python can be determined jump to an earlier
- # offset. Otherwise, False.
- # Note that this is a generalization of Python's "is_jump_target".
- is_jump_target: Union[bool, str]
- is_current_instruction: bool
- starts_line: Optional[int]
- # Offset of the instruction
- offset: int
- # Human readable name for operation
- opname: str
- # Numeric code for operation
- opcode: int
- # True if instruction has an operand, otherwise False.
- has_arg: bool
- # Numeric operand value if operation has an operand. Otherwise, None.
- # This operand value is an index into one of the lists of a code type.
- # The exact table indexed depends on optype.
- arg: Optional[int]
- # String representation of argval if argval is not None.
- argrepr: Optional[str]
- # If not None, a string representation of the top of the stack (TOS)
- tos_str: Optional[str] = None
- class Instruction(NamedTuple):
- """Details for a bytecode operation
- The order of the fields below follows roughly how the values might be displayed
- in an assembly listing.
- is_jump_target: True if other code jumps to here,
- 'loop' if this is a loop beginning, which
- in Python can be determined jump to an earlier offset.
- Otherwise, False.
- starts_line: Optional Line started by this opcode (if any). Otherwise None.
- offset: Start index of operation within bytecode sequence.
- opname: human-readable name for operation.
- opcode: numeric code for operation.
- has_arg: True if opcode takes an argument. In that case,
- ``argval`` and ``argepr`` will have that value. False
- if this opcode doesn't take an argument. When False,
- don't look at ``argval`` or ``argrepr``.
- arg: Optional numeric argument to operation (if any). Otherwise, None.
- argval: resolved arg value (if known). Otherwise, the same as ``arg``.
- argrepr: human-readable description of operation argument.
- positions: Optional dis.Positions object holding the start and end locations that
- are covered by this instruction. This not implemented yet.
- optype: Opcode classification. One of:
- "compare", "const", "free", "jabs", "jrel", "local",
- "name", or "nargs".
- inst_size: number of bytes the instruction occupies
- has_extended_arg: True if the instruction was built from EXTENDED_ARG
- opcodes.
- fallthrough: True if the instruction can (not must) fall through to the next
- instruction. Note conditionals are in this category, but
- returns, raise, and unconditional jumps are not.
- Note: the following fields have to appear in the order below and be at the end.
- disassembly may replace (delete and insert) an instruction, and it assumes
- the ending fields are as follows:
- tos_str: If not None, a string representation of the top of the stack (TOS).
- This is obtained by scanning previous instructions and
- using information there and in their ``tos_str`` fields.
- start_offset: if not None the instruction with the lowest offset that
- pushes a stack entry that is consume by this opcode
- """
- # Numeric code for operation
- opcode: int
- # Human readable name for operation
- opname: str
- # Numeric operand value if operation has an operand, otherwise None.
- # This operand value is an index into one of the lists of a code type.
- # The exact table indexed depends on optype.
- arg: Optional[int]
- # Resolved operand value (if known). This is obtained indexing the appropriate list
- # indicated by optype using value arg.
- # If for some reason we can't extract a value this way ``argval`` has value as
- # ``arg``.
- argval: Any
- # String representation of argval if argval is not None.
- argrepr: Optional[str]
- # Offset of the instruction
- offset: int
- starts_line: Optional[int]
- # True if other code jumps to here, the string "loop" if this is a loop
- # beginning, which in Python can be determined jump to an earlier
- # offset. Otherwise, False.
- # Note that this is a generalization of Python's "is_jump_target".
- is_jump_target: Union[bool, str]
- # dis.Positions object holding the start and end locations that
- # are covered by this instruction.
- # FIXME: Implement. The below is just a placeholder.
- #
- positions: Optional[Any]
- # The following values are our own extended information not found (yet) #
- # in Python's Instruction structure. #
- # First, values which can be computed or derived from the above,
- # along with an opcode structure. These We add these in an
- # instruction to make the instruction self sufficient.
- # opcode classification. One of:
- # compare, const, free, jabs, jrel, local, name, nargs
- optype: str
- # True if instruction has an operand, otherwise False.
- has_arg: bool
- # The number of bytes this instruction consumes.
- inst_size: int
- # True there were EXTENDED_ARG opcodes before this, otherwise False
- has_extended_arg: Optional[bool] = None
- # True if the instruction can (not must) fall through to the next
- # instruction. Note conditionals are in this category, but
- # returns, raise, and unconditional jumps are not.
- fallthrough: Optional[bool] = None
- # If not None, a string representation of the top of the stack (TOS)
- tos_str: Optional[str] = None
- # Python expressions can be straight-line, operator like-basic block code that take
- # items off a stack and push a value onto the stack. In this case, in a linear scan
- # we can basically build up an expression tree.
- # Note this has to be the last field. Code to set this assumes this.
- start_offset: Optional[int] = None
- def disassemble(
- self,
- opc,
- line_starts: Optional[Dict[int, int]] = None,
- lineno_width: int=3,
- mark_as_current: bool=False,
- asm_format: str="classic",
- instructions=[],
- ) -> str:
- """
- Format instruction details for inclusion in disassembly output.
- ``line_starts`` when it exists is a dictionary mapping a bytecode offsets to
- line numbers.
- ``lineno_width`` sets the width of the line number field (0 omits it)
- ``mark_as_current`` inserts a '-->' marker arrow as part of the line.
- """
- fields = []
- indexed_operand = frozenset(["name", "local", "compare", "free"])
- opcode = self.opcode
- # Column: Source code line number
- if lineno_width:
- if self.starts_line is not None:
- if asm_format == "asm":
- lineno_fmt = "%%%dd:\n" % lineno_width
- fields.append(lineno_fmt % self.starts_line)
- fields.append(" " * lineno_width)
- if self.is_jump_target:
- fields.append(" " * (lineno_width - 1))
- else:
- lineno_fmt = "%%%dd:" % lineno_width
- fields.append(lineno_fmt % self.starts_line)
- else:
- fields.append(" " * (lineno_width + 1))
- # Column: Current instruction indicator
- if mark_as_current and asm_format != "asm":
- fields.append("-->")
- else:
- fields.append(" ")
- # Column: Jump target marker
- if self.is_jump_target:
- if asm_format != "asm":
- fields.append(">>")
- else:
- fields = ["L%d:\n" % self.offset] + fields
- if not self.starts_line:
- fields.append(" ")
- else:
- fields.append(" ")
- # Column: Instruction offset from start of code sequence
- if asm_format != "asm":
- fields.append(repr(self.offset).rjust(4))
- # Column: Instruction bytes
- if asm_format in ("extended-bytes", "bytes"):
- hex_bytecode = "|%02x" % opcode
- if self.inst_size == 1:
- # Not 3.6 or later
- hex_bytecode += " " * (2 * 3)
- if self.inst_size == 2:
- # Must be Python 3.6 or later
- if self.has_arg and self.arg is not None:
- hex_bytecode += " %02x" % (self.arg % 256)
- else:
- hex_bytecode += " 00"
- elif self.inst_size == 3 and self.arg is not None:
- # Not 3.6 or later
- hex_bytecode += " %02x %02x" % divmod(self.arg, 256)
- fields.append(hex_bytecode + "|")
- # Column: Opcode name
- fields.append(self.opname.ljust(_OPNAME_WIDTH))
- # Column: Opcode argument
- if self.arg is not None:
- argrepr = self.argrepr
- # The ``argrepr`` value when the instruction was created
- # generally has all the information we require. However,
- # for "asm" format, want additional explicit information
- # linking operands to tables.
- if asm_format == "asm":
- if self.is_jump() and self.argrepr is not None:
- assert self.argrepr.startswith("to ")
- jump_target = self.argrepr[len("to ") :]
- fields.append("L" + jump_target)
- elif self.optype in indexed_operand:
- fields.append(repr(self.arg))
- fields.append("(%s)" % argrepr)
- argrepr = None
- elif (
- self.optype == "const"
- and argrepr is not None
- and not re.search(r"\s", argrepr)
- ):
- fields.append(repr(self.arg))
- fields.append("(%s)" % argrepr)
- argrepr = None
- else:
- fields.append(repr(self.arg))
- elif asm_format in ("extended", "extended-bytes"):
- if (
- self.is_jump()
- and line_starts is not None
- and line_starts.get(self.argval) is not None
- ):
- new_instruction = list(self)
- new_instruction[-2] = f"To line {line_starts[self.argval]}"
- # Here and below we use self.__class__ instead of Instruction
- # so that other kinds of compatible namedtuple Instructions
- # can be used. In particular, the control-flow project
- # defines such an ExtendedInstruction namedtuple
- self = self.__class__(*new_instruction)
- del instructions[-1]
- instructions.append(self)
- elif (
- hasattr(opc, "opcode_extended_fmt")
- and self.opname in opc.opcode_extended_fmt
- ):
- new_repr = opc.opcode_extended_fmt.get(self.opname, lambda opc, instr: None)(
- opc, list(reversed(instructions))
- )
- start_offset = None
- if isinstance(new_repr, tuple) and len(new_repr) == 2:
- new_repr, start_offset = new_repr
- if new_repr:
- # Add tos_str info to tos_str field of instruction.
- # This the last field in instruction.
- new_instruction = list(self)
- new_instruction[-1] = start_offset
- new_instruction[-2] = new_repr
- del instructions[-1]
- # See comment above abut the use of self.__class__
- self = self.__class__(*new_instruction)
- instructions.append(self)
- argrepr = new_repr
- elif opcode in opc.nullaryloadop:
- new_instruction = list(self)
- new_instruction[-2] = self.argrepr
- start_offset = new_instruction[-1] = self.offset
- del instructions[-1]
- # See comment above abut the use of self.__class__
- self = self.__class__(*new_instruction)
- instructions.append(self)
- pass
- if not argrepr:
- if asm_format != "asm" or self.opname == "MAKE_FUNCTION":
- fields.append(repr(self.arg))
- pass
- else:
- # Column: Opcode argument details
- if len(instructions) > 0:
- argval = self.argval
- if self.tos_str is None:
- fields.append(f"({self.argrepr})")
- else:
- if self.optype in ("vargs", "encoded_arg"):
- prefix = f"{self.argval} ; "
- elif self.argrepr is None:
- prefix = ""
- else:
- prefix = f"({self.argrepr}) ; "
- if opcode in opc.operator_set | opc.callop:
- prefix += "TOS = "
- fields.append(f"{prefix}{self.tos_str}")
- pass
- elif self.argrepr is not None:
- fields.append(self.argrepr)
- pass
- pass
- elif asm_format in ("extended", "extended-bytes"):
- if (
- hasattr(opc, "opcode_extended_fmt")
- and self.opname in opc.opcode_extended_fmt
- ):
- new_repr, start_offset = opc.opcode_extended_fmt.get(self.opname, (None, 0))(
- opc, list(reversed(instructions))
- )
- if new_repr:
- new_instruction = list(self)
- new_instruction[-2] = new_repr
- new_instruction[-1] = start_offset
- del instructions[-1]
- # See comment above abut the use of self.__class__
- instructions.append(self.__class__(*new_instruction))
- argval = self.argval
- prefix = "" if argval is None else f"({argval}) | "
- if self.opcode in opc.operator_set:
- prefix += "TOS = "
- fields.append(f"{prefix}{new_repr}")
- pass
- elif (
- hasattr(opc, "opcode_arg_fmt")
- and self.opname in opc.opcode_arg_fmt
- ) and self.argrepr is not None:
- fields.append(self.argrepr)
- pass
- pass
- return " ".join(fields).rstrip()
- def format_to_assembly_line(
- self,
- current_instruction_line,
- ) -> AssembleFormat:
- """
- Format instruction into a structure that can be easily
- turned a structure contains the essential information
- that would be shown as a line in an assembly listing
- """
- return AssembleFormat(
- self.is_jump_target,
- current_instruction_line == self.starts_line,
- self.starts_line,
- self.offset,
- self.opname,
- self.opcode,
- self.has_arg,
- self.arg,
- self.argrepr,
- self.tos_str,
- )
- def is_jump(self) -> bool:
- """
- Return True if instruction is some sort of jump.
- """
- return self.optype in ("jabs", "jrel")
- def jumps_forward(self) -> bool:
- """
- Return True if instruction is jump backwards
- """
- return self.is_jump() and self.offset < self.argval
- # if __name__ == '__main__':
- # pass
|