| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470 |
- # Copyright (c) 2015-2024 by Rocky Bernstein
- # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
- # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- """
- Python 2 Generic bytecode scanner/deparser
- This overlaps various Python3's dis module, but it can be run from
- Python versions other than the version running this code. Notably,
- run from Python version 2.
- Also we *modify* the instruction sequence to assist deparsing code.
- For example:
- - we add "COME_FROM" instructions to help in figuring out
- conditional branching and looping.
- - LOAD_CONSTs are classified further into the type of thing
- they load:
- lambda's, genexpr's, {dict,set,list} comprehension's,
- - PARAMETER counts appended {CALL,MAKE}_FUNCTION, BUILD_{TUPLE,SET,SLICE}
- Finally we save token information.
- """
- from __future__ import print_function
- from copy import copy
- from sys import intern
- from xdis import code2num, instruction_size, iscode, op_has_argument
- from xdis.bytecode import _get_const_info
- from uncompyle6.scanner import Scanner, Token
- class Scanner2(Scanner):
- def __init__(self, version, show_asm=None, is_pypy=False):
- Scanner.__init__(self, version, show_asm, is_pypy)
- self.pop_jump_if = frozenset([self.opc.PJIF, self.opc.PJIT])
- self.jump_forward = frozenset([self.opc.JUMP_ABSOLUTE, self.opc.JUMP_FORWARD])
- # This is the 2.5+ default
- # For <2.5 it is <generator expression>
- self.genexpr_name = "<genexpr>"
- self.load_asserts = set([])
- # Create opcode classification sets
- # Note: super initialization above initializes self.opc
- # Ops that start SETUP_ ... We will COME_FROM with these names
- # Some blocks and END_ statements. And they can start
- # a new statement
- self.statement_opcodes = frozenset(
- [
- self.opc.SETUP_LOOP,
- self.opc.BREAK_LOOP,
- self.opc.SETUP_FINALLY,
- self.opc.END_FINALLY,
- self.opc.SETUP_EXCEPT,
- self.opc.POP_BLOCK,
- self.opc.STORE_FAST,
- self.opc.DELETE_FAST,
- self.opc.STORE_DEREF,
- self.opc.STORE_GLOBAL,
- self.opc.DELETE_GLOBAL,
- self.opc.STORE_NAME,
- self.opc.DELETE_NAME,
- self.opc.STORE_ATTR,
- self.opc.DELETE_ATTR,
- self.opc.STORE_SUBSCR,
- self.opc.DELETE_SUBSCR,
- self.opc.RETURN_VALUE,
- self.opc.RAISE_VARARGS,
- self.opc.POP_TOP,
- self.opc.PRINT_EXPR,
- self.opc.PRINT_ITEM,
- self.opc.PRINT_NEWLINE,
- self.opc.PRINT_ITEM_TO,
- self.opc.PRINT_NEWLINE_TO,
- self.opc.CONTINUE_LOOP,
- self.opc.JUMP_ABSOLUTE,
- self.opc.EXEC_STMT,
- ]
- )
- # Opcodes that can start a "store" non-terminal.
- # FIXME: JUMP_ABSOLUTE is weird. What's up with that?
- self.designator_ops = frozenset(
- [
- self.opc.STORE_FAST,
- self.opc.STORE_NAME,
- self.opc.STORE_GLOBAL,
- self.opc.STORE_DEREF,
- self.opc.STORE_ATTR,
- self.opc.STORE_SLICE_0,
- self.opc.STORE_SLICE_1,
- self.opc.STORE_SLICE_2,
- self.opc.STORE_SLICE_3,
- self.opc.STORE_SUBSCR,
- self.opc.UNPACK_SEQUENCE,
- self.opc.JUMP_ABSOLUTE,
- ]
- )
- # Python 2.7 has POP_JUMP_IF_{TRUE,FALSE}_OR_POP but < 2.7 doesn't
- # Add an empty set make processing more uniform.
- self.pop_jump_if_or_pop = frozenset([])
- # opcodes with expect a variable number pushed values whose
- # count is in the opcode. For parsing we generally change the
- # opcode name to include that number.
- self.varargs_ops = frozenset(
- [
- self.opc.BUILD_LIST,
- self.opc.BUILD_TUPLE,
- self.opc.BUILD_SLICE,
- self.opc.UNPACK_SEQUENCE,
- self.opc.MAKE_FUNCTION,
- self.opc.CALL_FUNCTION,
- self.opc.MAKE_CLOSURE,
- self.opc.CALL_FUNCTION_VAR,
- self.opc.CALL_FUNCTION_KW,
- self.opc.CALL_FUNCTION_VAR_KW,
- self.opc.DUP_TOPX,
- self.opc.RAISE_VARARGS,
- ]
- )
- @staticmethod
- def extended_arg_val(arg):
- """Return integer value of an EXTENDED_ARG operand.
- In Python2 this always the operand value shifted 16 bits since
- the operand is always 2 bytes. In Python 3.6+ this changes to one byte.
- """
- return arg << 16
- @staticmethod
- def unmangle_name(name, classname):
- """Remove __ from the end of _name_ if it starts with __classname__
- return the "unmangled" name.
- """
- if name.startswith(classname) and name[-2:] != "__":
- return name[len(classname) - 2 :]
- return name
- @classmethod
- def unmangle_code_names(self, co, classname):
- """Remove __ from the end of _name_ if it starts with __classname__
- return the "unmangled" name.
- """
- if classname:
- classname = "_" + classname.lstrip("_") + "__"
- if hasattr(co, "co_cellvars"):
- free = [
- self.unmangle_name(name, classname)
- for name in (co.co_cellvars + co.co_freevars)
- ]
- else:
- free = ()
- names = [self.unmangle_name(name, classname) for name in co.co_names]
- varnames = [self.unmangle_name(name, classname) for name in co.co_varnames]
- else:
- if hasattr(co, "co_cellvars"):
- free = co.co_cellvars + co.co_freevars
- else:
- free = ()
- names = co.co_names
- varnames = co.co_varnames
- return free, names, varnames
- def ingest(self, co, classname=None, code_objects={}, show_asm=None):
- """
- Create "tokens" the bytecode of an Python code object. Largely these
- are the opcode name, but in some cases that has been modified to make parsing
- easier.
- returning a list of uncompyle6 Token's.
- Some transformations are made to assist the deparsing grammar:
- - various types of LOAD_CONST's are categorized in terms of what they load
- - COME_FROM instructions are added to assist parsing control structures
- - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
- * BUILD_LIST, BUILD_SET
- * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
- - EXTENDED_ARGS instructions are removed
- Also, when we encounter certain tokens, we add them to a set which will cause custom
- grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
- cause specific rules for the specific number of arguments they take.
- """
- if not show_asm:
- show_asm = self.show_asm
- bytecode = self.build_instructions(co)
- if show_asm in ("both", "before"):
- print("\n# ---- disassembly:")
- bytecode.disassemble_bytes(
- co.co_code,
- varnames=co.co_varnames,
- names=co.co_names,
- constants=co.co_consts,
- cells=bytecode._cell_names,
- line_starts=bytecode._linestarts,
- asm_format="extended",
- )
- # list of tokens/instructions
- new_tokens = []
- # "customize" is in the process of going away here
- customize = {}
- if self.is_pypy:
- customize["PyPy"] = 0
- codelen = len(self.code)
- free, names, varnames = self.unmangle_code_names(co, classname)
- self.names = names
- # Scan for assertions. Later we will
- # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
- # 'LOAD_ASSERT' is used in assert statements.
- self.load_asserts = set()
- for i in self.op_range(0, codelen):
- # We need to detect the difference between:
- # raise AssertionError
- # and
- # assert ...
- # Below we use the heuristic that an "sssert" is preceded by a POP_JUMP.
- # however we could also use followed by RAISE_VARARGS
- # or for PyPy there may be a JUMP_IF_NOT_DEBUG before.
- # FIXME: remove uses of PJIF, and PJIT
- if self.is_pypy:
- have_pop_jump = self.code[i] in (self.opc.PJIF, self.opc.PJIT)
- else:
- have_pop_jump = self.code[i] == self.opc.PJIT
- if have_pop_jump and self.code[i + 3] == self.opc.LOAD_GLOBAL:
- if names[self.get_argument(i + 3)] == "AssertionError":
- self.load_asserts.add(i + 3)
- # Get jump targets
- # Format: {target offset: [jump offsets]}
- load_asserts_save = copy(self.load_asserts)
- jump_targets = self.find_jump_targets(show_asm)
- self.load_asserts = load_asserts_save
- # print("XXX2", jump_targets)
- last_stmt = self.next_stmt[0]
- i = self.next_stmt[last_stmt]
- replace = {}
- while i < codelen - 1:
- if self.lines[last_stmt].next > i:
- # Distinguish "print ..." from "print ...,"
- if self.code[last_stmt] == self.opc.PRINT_ITEM:
- if self.code[i] == self.opc.PRINT_ITEM:
- replace[i] = "PRINT_ITEM_CONT"
- elif self.code[i] == self.opc.PRINT_NEWLINE:
- replace[i] = "PRINT_NEWLINE_CONT"
- last_stmt = i
- i = self.next_stmt[i]
- extended_arg = 0
- for offset in self.op_range(0, codelen):
- if offset in jump_targets:
- jump_idx = 0
- # We want to process COME_FROMs to the same offset to be in *descending*
- # offset order so we have the larger range or biggest instruction interval
- # last. (I think they are sorted in increasing order, but for safety
- # we sort them). That way, specific COME_FROM tags will match up
- # properly. For example, a "loop" with an "if" nested in it should have the
- # "loop" tag last so the grammar rule matches that properly.
- for jump_offset in sorted(jump_targets[offset], reverse=True):
- # if jump_offset == last_offset:
- # continue
- # last_offset = jump_offset
- come_from_name = "COME_FROM"
- op_name = self.opname_for_offset(jump_offset)
- if op_name.startswith("SETUP_") and self.version[:2] == (2, 7):
- come_from_type = op_name[len("SETUP_") :]
- if come_from_type not in ("LOOP", "EXCEPT"):
- come_from_name = "COME_FROM_%s" % come_from_type
- pass
- new_tokens.append(
- Token(
- come_from_name,
- jump_offset,
- repr(jump_offset),
- offset="%s_%d" % (offset, jump_idx),
- has_arg=True,
- )
- )
- jump_idx += 1
- pass
- op = self.code[offset]
- op_name = self.op_name(op)
- oparg = None
- pattr = None
- has_arg = op_has_argument(op, self.opc)
- if has_arg:
- oparg = self.get_argument(offset) + extended_arg
- extended_arg = 0
- if op == self.opc.EXTENDED_ARG:
- extended_arg += self.extended_arg_val(oparg)
- continue
- # Note: name used to match on rather than op since
- # BUILD_SET isn't in earlier Pythons.
- if op_name in (
- "BUILD_LIST",
- "BUILD_SET",
- ):
- t = Token(
- op_name,
- oparg,
- pattr,
- offset,
- self.linestarts.get(offset, None),
- op,
- has_arg,
- self.opc,
- )
- collection_type = op_name.split("_")[1]
- next_tokens = self.bound_collection_from_tokens(
- new_tokens, t, len(new_tokens), "CONST_%s" % collection_type
- )
- if next_tokens is not None:
- new_tokens = next_tokens
- continue
- if op in self.opc.CONST_OPS:
- const = co.co_consts[oparg]
- if iscode(const):
- oparg = const
- if const.co_name == "<lambda>":
- assert op_name == "LOAD_CONST"
- op_name = "LOAD_LAMBDA"
- elif const.co_name == "<genexpr>":
- op_name = "LOAD_GENEXPR"
- elif const.co_name == "<dictcomp>":
- op_name = "LOAD_DICTCOMP"
- elif const.co_name == "<setcomp>":
- op_name = "LOAD_SETCOMP"
- else:
- op_name = "LOAD_CODE"
- # verify() uses 'pattr' for comparison, since 'attr'
- # now holds Code(const) and thus can not be used
- # for comparison (todo: think about changing this)
- # pattr = 'code_object @ 0x%x %s->%s' %\
- # (id(const), const.co_filename, const.co_name)
- pattr = "<code_object " + const.co_name + ">"
- else:
- if oparg < len(co.co_consts):
- argval, _ = _get_const_info(oparg, co.co_consts)
- # Why don't we use _ above for "pattr" rather than "const"?
- # This *is* a little hoaky, but we have to coordinate with
- # other parts like n_LOAD_CONST in pysource.py for example.
- pattr = const
- pass
- elif op in self.opc.NAME_OPS:
- pattr = names[oparg]
- elif op in self.opc.JREL_OPS:
- # use instead: hasattr(self, 'patch_continue'): ?
- if self.version[:2] == (2, 7):
- self.patch_continue(new_tokens, offset, op)
- pattr = repr(offset + 3 + oparg)
- elif op in self.opc.JABS_OPS:
- # use instead: hasattr(self, 'patch_continue'): ?
- if self.version[:2] == (2, 7):
- self.patch_continue(new_tokens, offset, op)
- pattr = repr(oparg)
- elif op in self.opc.LOCAL_OPS:
- pattr = varnames[oparg]
- elif op in self.opc.COMPARE_OPS:
- pattr = self.opc.cmp_op[oparg]
- elif op in self.opc.FREE_OPS:
- pattr = free[oparg]
- if op in self.varargs_ops:
- # CE - Hack for >= 2.5
- # Now all values loaded via LOAD_CLOSURE are packed into
- # a tuple before calling MAKE_CLOSURE.
- if (
- op == self.opc.BUILD_TUPLE
- and self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE
- ):
- continue
- else:
- if self.is_pypy and not oparg and op_name == "BUILD_MAP":
- op_name = "BUILD_MAP_n"
- else:
- op_name = "%s_%d" % (op_name, oparg)
- pass
- # FIXME: Figure out why this is needed and remove.
- customize[op_name] = oparg
- elif op == self.opc.JUMP_ABSOLUTE:
- # Further classify JUMP_ABSOLUTE into backward jumps
- # which are used in loops, and "CONTINUE" jumps which
- # may appear in a "continue" statement. The loop-type
- # and continue-type jumps will help us classify loop
- # boundaries The continue-type jumps help us get
- # "continue" statements with would otherwise be turned
- # into a "pass" statement because JUMPs are sometimes
- # ignored in rules as just boundary overhead. In
- # comprehensions we might sometimes classify JUMP_BACK
- # as CONTINUE, but that's okay since we add a grammar
- # rule for that.
- target = self.get_target(offset)
- if target <= offset:
- op_name = "JUMP_BACK"
- # 'Continue's include jumps to loops that are not
- # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
- # If the JUMP_ABSOLUTE is
- # either to a FOR_ITER or the instruction after a SETUP_LOOP
- # and it is followed by another JUMP_FORWARD
- # then we'll take it as a "continue".
- j = self.offset2inst_index.get(offset)
- # EXTENDED_ARG doesn't appear in instructions,
- # but is instead the next opcode folded into it, and has the offset
- # of the EXTENDED_ARG. Therefore in self.offset2nist_index we'll find
- # the instruction at the previous EXTENDED_ARG offset which is 3
- # bytes back.
- if j is None and offset > self.opc.ARG_MAX_VALUE:
- j = self.offset2inst_index[offset - 3]
- target_index = self.offset2inst_index[target]
- is_continue = (
- self.insts[target_index - 1].opname == "SETUP_LOOP"
- and self.insts[j + 1].opname == "JUMP_FORWARD"
- )
- if is_continue:
- op_name = "CONTINUE"
- if offset in self.stmts and self.code[offset + 3] not in (
- self.opc.END_FINALLY,
- self.opc.POP_BLOCK,
- ):
- if (
- (
- offset in self.linestarts
- and self.code[self.prev[offset]]
- == self.opc.JUMP_ABSOLUTE
- )
- or self.code[target] == self.opc.FOR_ITER
- or offset not in self.not_continue
- ):
- op_name = "CONTINUE"
- elif op == self.opc.LOAD_GLOBAL:
- if offset in self.load_asserts:
- op_name = "LOAD_ASSERT"
- elif op == self.opc.RETURN_VALUE:
- if offset in self.return_end_ifs:
- op_name = "RETURN_END_IF"
- linestart = self.linestarts.get(offset, None)
- if offset not in replace:
- new_tokens.append(
- Token(
- op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc
- )
- )
- else:
- new_tokens.append(
- Token(
- replace[offset],
- oparg,
- pattr,
- offset,
- linestart,
- op,
- has_arg,
- self.opc,
- )
- )
- pass
- pass
- if show_asm in ("both", "after"):
- print("\n# ---- tokenization:")
- # FIXME: t.format() is changing tokens!
- for t in new_tokens.copy():
- print(t.format(line_prefix=""))
- print()
- return new_tokens, customize
- def build_statement_indices(self):
- code = self.code
- start = 0
- end = len(code)
- stmt_opcode_seqs = frozenset(
- [
- (self.opc.PJIF, self.opc.JUMP_FORWARD),
- (self.opc.PJIF, self.opc.JUMP_ABSOLUTE),
- (self.opc.PJIT, self.opc.JUMP_FORWARD),
- (self.opc.PJIT, self.opc.JUMP_ABSOLUTE),
- ]
- )
- prelim = self.all_instr(start, end, self.statement_opcodes)
- stmts = self.stmts = set(prelim)
- pass_stmts = set()
- for seq in stmt_opcode_seqs:
- for i in self.op_range(start, end - (len(seq) + 1)):
- match = True
- for elem in seq:
- if elem != code[i]:
- match = False
- break
- i += instruction_size(code[i], self.opc)
- if match:
- i = self.prev[i]
- stmts.add(i)
- pass_stmts.add(i)
- if pass_stmts:
- stmt_list = list(stmts)
- stmt_list.sort()
- else:
- stmt_list = prelim
- last_stmt = -1
- self.next_stmt = []
- slist = self.next_stmt = []
- i = 0
- for s in stmt_list:
- if code[s] == self.opc.JUMP_ABSOLUTE and s not in pass_stmts:
- target = self.get_target(s)
- if target > s or (
- self.lines and self.lines[last_stmt].l_no == self.lines[s].l_no
- ):
- stmts.remove(s)
- continue
- j = self.prev[s]
- while code[j] == self.opc.JUMP_ABSOLUTE:
- j = self.prev[j]
- if (
- self.version >= (2, 3)
- and self.opname_for_offset(j) == "LIST_APPEND"
- ): # list comprehension
- stmts.remove(s)
- continue
- elif code[s] == self.opc.POP_TOP:
- # The POP_TOP in:
- # ROT_TWO, POP_TOP,
- # RETURN_xxx, POP_TOP (in 2.6-), or
- # JUMP_IF_{FALSE,TRUE}, POP_TOP (in 2.6-)
- # is part of the previous instruction and not the
- # beginning of a new statement
- prev = code[self.prev[s]]
- if (
- prev == self.opc.ROT_TWO
- or self.version < (2, 7)
- and prev
- in (
- self.opc.JUMP_IF_FALSE,
- self.opc.JUMP_IF_TRUE,
- self.opc.RETURN_VALUE,
- )
- ):
- stmts.remove(s)
- continue
- elif code[s] in self.designator_ops:
- j = self.prev[s]
- while code[j] in self.designator_ops:
- j = self.prev[j]
- if self.version > (2, 1) and code[j] == self.opc.FOR_ITER:
- stmts.remove(s)
- continue
- last_stmt = s
- slist += [s] * (s - i)
- i = s
- slist += [end] * (end - len(slist))
- def next_except_jump(self, start):
- """
- Return the next jump that was generated by an except SomeException:
- construct in a try...except...else clause or None if not found.
- """
- if self.code[start] == self.opc.DUP_TOP:
- except_match = self.first_instr(start, len(self.code), self.opc.PJIF)
- if except_match:
- jmp = self.prev[self.get_target(except_match)]
- # In Python < 2.7 we may have jumps to jumps
- if self.version < (2, 7) and self.code[jmp] in self.jump_forward:
- self.not_continue.add(jmp)
- jmp = self.get_target(jmp)
- prev_offset = self.prev[except_match]
- # COMPARE_OP argument should be "exception-match" or 10
- if (
- self.code[prev_offset] == self.opc.COMPARE_OP
- and self.code[prev_offset + 1] != 10
- ):
- return None
- if jmp not in self.pop_jump_if | self.jump_forward:
- self.ignore_if.add(except_match)
- return None
- self.ignore_if.add(except_match)
- self.not_continue.add(jmp)
- return jmp
- count_END_FINALLY = 0
- count_SETUP_ = 0
- for i in self.op_range(start, len(self.code)):
- op = self.code[i]
- if op == self.opc.END_FINALLY:
- if count_END_FINALLY == count_SETUP_:
- if self.version[:2] == (2, 7):
- assert self.code[self.prev[i]] in self.jump_forward | frozenset(
- [self.opc.RETURN_VALUE]
- )
- self.not_continue.add(self.prev[i])
- return self.prev[i]
- count_END_FINALLY += 1
- elif op in self.setup_ops:
- count_SETUP_ += 1
- def detect_control_flow(self, offset, op, extended_arg):
- """
- Detect type of block structures and their boundaries to fix optimized jumps
- in python2.3+
- """
- code = self.code
- # Detect parent structure
- parent = self.structs[0]
- start = parent["start"]
- end = parent["end"]
- next_line_byte = end
- # Pick inner-most parent for our offset
- for struct in self.structs:
- current_start = struct["start"]
- current_end = struct["end"]
- if (current_start <= offset < current_end) and (
- current_start >= start and current_end <= end
- ):
- start = current_start
- end = current_end
- parent = struct
- if op == self.opc.SETUP_LOOP:
- # We categorize loop types: 'for', 'while', 'while 1' with
- # possibly suffixes '-loop' and '-else'
- # Try to find the jump_back instruction of the loop.
- # It could be a return instruction.
- inst = self.insts[self.offset2inst_index[offset]]
- start += instruction_size(op, self.opc)
- setup_target = inst.argval
- loop_end_offset = self.restrict_to_parent(setup_target, parent)
- self.setup_loop_targets[offset] = setup_target
- self.setup_loops[setup_target] = offset
- if setup_target != loop_end_offset:
- self.fixed_jumps[offset] = loop_end_offset
- if self.lines:
- (line_no, next_line_byte) = self.lines[offset]
- # jump_back_offset is the instruction after the SETUP_LOOP
- # where we iterate back to.
- jump_back_offset = self.last_instr(
- start, loop_end_offset, self.opc.JUMP_ABSOLUTE, next_line_byte, False
- )
- if jump_back_offset:
- # Account for the fact that < 2.7 has an explicit
- # POP_TOP instruction in the equivalate POP_JUMP_IF
- # construct
- if self.version < (2, 7):
- jump_forward_offset = jump_back_offset + 4
- return_val_offset1 = self.prev[
- self.prev[self.prev[loop_end_offset]]
- ]
- # Is jump back really "back"?
- jump_target = self.get_target(
- jump_back_offset, code[jump_back_offset]
- )
- if jump_target > jump_back_offset or code[jump_back_offset + 3] in [
- self.opc.JUMP_FORWARD,
- self.opc.JUMP_ABSOLUTE,
- ]:
- jump_back_offset = None
- pass
- else:
- jump_forward_offset = jump_back_offset + 3
- return_val_offset1 = self.prev[self.prev[loop_end_offset]]
- if (
- jump_back_offset
- and jump_back_offset != self.prev[loop_end_offset]
- and code[jump_forward_offset] in self.jump_forward
- ):
- if code[self.prev[loop_end_offset]] == self.opc.RETURN_VALUE or (
- code[self.prev[loop_end_offset]] == self.opc.POP_BLOCK
- and code[return_val_offset1] == self.opc.RETURN_VALUE
- ):
- jump_back_offset = None
- if not jump_back_offset:
- # loop suite ends in return
- # scanner26 of wbiti had:
- # jump_back_offset = self.last_instr(start, loop_end_offset, self.opc.JUMP_ABSOLUTE, start, False)
- jump_back_offset = self.last_instr(
- start, loop_end_offset, self.opc.RETURN_VALUE
- )
- if not jump_back_offset:
- return
- jump_back_offset += 1
- if_offset = None
- if self.version < (2, 7):
- # Look for JUMP_IF POP_TOP ...
- if code[self.prev[next_line_byte]] == self.opc.POP_TOP and (
- code[self.prev[self.prev[next_line_byte]]] in self.pop_jump_if
- ):
- if_offset = self.prev[self.prev[next_line_byte]]
- elif code[self.prev[next_line_byte]] in self.pop_jump_if:
- # Look for POP_JUMP_IF ...
- if_offset = self.prev[next_line_byte]
- if if_offset:
- loop_type = "while"
- self.ignore_if.add(if_offset)
- if self.version < (2, 7) and (
- code[self.prev[jump_back_offset]] == self.opc.RETURN_VALUE
- ):
- self.ignore_if.add(self.prev[jump_back_offset])
- pass
- pass
- else:
- loop_type = "for"
- setup_target = next_line_byte
- loop_end_offset = jump_back_offset + 3
- else:
- # We have a loop with a jump-back instruction
- if self.get_target(jump_back_offset) >= next_line_byte:
- jump_back_offset = self.last_instr(
- start, loop_end_offset, self.opc.JUMP_ABSOLUTE, start, False
- )
- if (
- loop_end_offset > jump_back_offset + 4
- and code[loop_end_offset] in self.jump_forward
- ):
- if code[jump_back_offset + 4] in self.jump_forward:
- if self.get_target(jump_back_offset + 4) == self.get_target(
- loop_end_offset
- ):
- self.fixed_jumps[offset] = jump_back_offset + 4
- loop_end_offset = jump_back_offset + 4
- elif setup_target < offset:
- self.fixed_jumps[offset] = jump_back_offset + 4
- loop_end_offset = jump_back_offset + 4
- setup_target = self.get_target(jump_back_offset, self.opc.JUMP_ABSOLUTE)
- if self.version > (2, 1) and code[setup_target] in (
- self.opc.FOR_ITER,
- self.opc.GET_ITER,
- ):
- loop_type = "for"
- else:
- loop_type = "while"
- # Look for a test condition immediately after the
- # SETUP_LOOP while
- if (
- self.version < (2, 7)
- and self.code[self.prev[next_line_byte]] == self.opc.POP_TOP
- ):
- test_op_offset = self.prev[self.prev[next_line_byte]]
- else:
- test_op_offset = self.prev[next_line_byte]
- if test_op_offset == offset:
- loop_type = "while 1"
- elif self.code[test_op_offset] in self.opc.JUMP_OPs:
- test_target = self.get_target(test_op_offset)
- self.ignore_if.add(test_op_offset)
- if test_target > (jump_back_offset + 3):
- jump_back_offset = test_target
- self.not_continue.add(jump_back_offset)
- self.loops.append(setup_target)
- self.structs.append(
- {
- "type": loop_type + "-loop",
- "start": setup_target,
- "end": jump_back_offset,
- }
- )
- if jump_back_offset + 3 != loop_end_offset:
- self.structs.append(
- {
- "type": loop_type + "-else",
- "start": jump_back_offset + 3,
- "end": loop_end_offset,
- }
- )
- elif op == self.opc.SETUP_EXCEPT:
- start = offset + instruction_size(op, self.opc)
- target = self.get_target(offset, op)
- end_offset = self.restrict_to_parent(target, parent)
- if target != end_offset:
- self.fixed_jumps[offset] = end_offset
- # print target, end, parent
- # Add the try block
- self.structs.append(
- {"type": "try", "start": start - 3, "end": end_offset - 4}
- )
- # Now isolate the except and else blocks
- end_else = start_else = self.get_target(self.prev[end_offset])
- end_finally_offset = end_offset
- setup_except_nest = 0
- while end_finally_offset < len(self.code):
- if self.code[end_finally_offset] == self.opc.END_FINALLY:
- if setup_except_nest == 0:
- break
- else:
- setup_except_nest -= 1
- elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT:
- setup_except_nest += 1
- end_finally_offset += instruction_size(
- code[end_finally_offset], self.opc
- )
- pass
- # Add the except blocks
- i = end_offset
- while i < len(self.code) and i < end_finally_offset:
- jmp = self.next_except_jump(i)
- if jmp is None: # check
- i = self.next_stmt[i]
- continue
- if self.code[jmp] == self.opc.RETURN_VALUE:
- self.structs.append({"type": "except", "start": i, "end": jmp + 1})
- i = jmp + 1
- else:
- target = self.get_target(jmp)
- if target != start_else:
- end_else = self.get_target(jmp)
- if self.code[jmp] == self.opc.JUMP_FORWARD:
- if self.version <= (2, 6):
- self.fixed_jumps[jmp] = target
- else:
- self.fixed_jumps[jmp] = -1
- self.structs.append({"type": "except", "start": i, "end": jmp})
- i = jmp + 3
- # Add the try-else block
- if end_else != start_else:
- r_end_else = self.restrict_to_parent(end_else, parent)
- # May be able to drop the 2.7 test.
- if self.version[:2] == (2, 7):
- self.structs.append(
- {"type": "try-else", "start": i + 1, "end": r_end_else}
- )
- self.fixed_jumps[i] = r_end_else
- else:
- self.fixed_jumps[i] = i + 1
- elif op in self.pop_jump_if:
- target = self.get_target(offset, op)
- rtarget = self.restrict_to_parent(target, parent)
- # Do not let jump to go out of parent struct bounds
- if target != rtarget and parent["type"] == "and/or":
- self.fixed_jumps[offset] = rtarget
- return
- jump_if_offset = offset
- start = offset + 3
- pre = self.prev
- # Does this jump to right after another conditional jump that is
- # not myself? If so, it's part of a larger conditional.
- # rocky: if we have a conditional jump to the next instruction, then
- # possibly I am "skipping over" a "pass" or null statement.
- test_target = target
- if self.version < (2, 7):
- # Before 2.7 we have to deal with the fact that there is an extra
- # POP_TOP that is logically associated with the JUMP_IF's (even though
- # the instance set is called "self.pop_jump_if")
- if code[pre[test_target]] == self.opc.POP_TOP:
- test_target = pre[test_target]
- test_set = self.pop_jump_if
- else:
- test_set = self.pop_jump_if_or_pop | self.pop_jump_if
- if code[pre[test_target]] in test_set and target > offset:
- # We have POP_JUMP_IF... target
- # ...
- # pre: POP_JUMP_IF ...
- # target: ...
- #
- # We will take that as either as "and" or "or".
- self.fixed_jumps[offset] = pre[target]
- self.structs.append(
- {"type": "and/or", "start": start, "end": pre[target]}
- )
- return
- # The instruction offset just before the target jump offset is important
- # in making a determination of what we have. Save that.
- pre_rtarget = pre[rtarget]
- # Is it an "and" inside an "if" or "while" block
- if op == self.opc.PJIF:
- # Search for other POP_JUMP_IF_...'s targeting the
- # same target, of the current POP_JUMP_... instruction,
- # starting from current offset, and filter everything inside inner 'or'
- # jumps and mid-line ifs
- match = self.rem_or(
- start, self.next_stmt[offset], self.opc.PJIF, target
- )
- # If we still have any offsets in set, start working on it
- if match:
- if (
- code[pre_rtarget] in self.jump_forward
- and pre_rtarget not in self.stmts
- and self.restrict_to_parent(
- self.get_target(pre_rtarget), parent
- )
- == rtarget
- ):
- if (
- code[pre[pre_rtarget]] == self.opc.JUMP_ABSOLUTE
- and self.remove_mid_line_ifs([offset])
- and target == self.get_target(pre[pre_rtarget])
- and (
- pre[pre_rtarget] not in self.stmts
- or self.get_target(pre[pre_rtarget]) > pre[pre_rtarget]
- )
- and 1
- == len(
- self.remove_mid_line_ifs(
- self.rem_or(
- start,
- pre[pre_rtarget],
- self.pop_jump_if,
- target,
- )
- )
- )
- ):
- pass
- elif (
- code[pre[pre_rtarget]] == self.opc.RETURN_VALUE
- and self.remove_mid_line_ifs([offset])
- and 1
- == (
- len(
- set(
- self.remove_mid_line_ifs(
- self.rem_or(
- start,
- pre[pre_rtarget],
- self.pop_jump_if,
- target,
- )
- )
- )
- | set(
- self.remove_mid_line_ifs(
- self.rem_or(
- start,
- pre[pre_rtarget],
- (
- self.opc.PJIF,
- self.opc.PJIT,
- self.opc.JUMP_ABSOLUTE,
- ),
- pre_rtarget,
- True,
- )
- )
- )
- )
- )
- ):
- pass
- else:
- fix = None
- jump_ifs = self.all_instr(
- start, self.next_stmt[offset], self.opc.PJIF
- )
- last_jump_good = True
- for j in jump_ifs:
- if target == self.get_target(j):
- if self.lines[j].next == j + 3 and last_jump_good:
- fix = j
- break
- else:
- last_jump_good = False
- self.fixed_jumps[offset] = fix or match[-1]
- return
- else:
- if self.version < (2, 7) and parent["type"] in (
- "root",
- "for-loop",
- "if-then",
- "else",
- "try",
- ):
- self.fixed_jumps[offset] = rtarget
- else:
- # note test for < 2.7 might be superfluous although informative
- # for 2.7 a different branch is taken and the below code is handled
- # under: elif op in self.pop_jump_if_or_pop
- # below
- self.fixed_jumps[offset] = match[-1]
- return
- else: # op != self.opc.PJIT
- if self.version < (2, 7) and code[offset + 3] == self.opc.POP_TOP:
- assert_offset = offset + 4
- else:
- assert_offset = offset + 3
- if (assert_offset) in self.load_asserts:
- if code[pre_rtarget] == self.opc.RAISE_VARARGS:
- return
- self.load_asserts.remove(assert_offset)
- next = self.next_stmt[offset]
- if pre[next] == offset:
- pass
- elif code[next] in self.jump_forward and target == self.get_target(
- next
- ):
- if code[pre[next]] == self.opc.PJIF:
- if (
- code[next] == self.opc.JUMP_FORWARD
- or target != rtarget
- or code[pre[pre_rtarget]]
- not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)
- ):
- self.fixed_jumps[offset] = pre[next]
- return
- elif (
- code[next] == self.opc.JUMP_ABSOLUTE
- and code[target] in self.jump_forward
- ):
- next_target = self.get_target(next)
- if self.get_target(target) == next_target:
- self.fixed_jumps[offset] = pre[next]
- return
- elif code[next_target] in self.jump_forward and self.get_target(
- next_target
- ) == self.get_target(target):
- self.fixed_jumps[offset] = pre[next]
- return
- # don't add a struct for a while test, it's already taken care of
- if offset in self.ignore_if:
- return
- if self.version == (2, 7):
- if (
- code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
- and pre_rtarget in self.stmts
- and pre_rtarget != offset
- and pre[pre_rtarget] != offset
- ):
- if (
- code[rtarget] == self.opc.JUMP_ABSOLUTE
- and code[rtarget + 3] == self.opc.POP_BLOCK
- ):
- if code[pre[pre_rtarget]] != self.opc.JUMP_ABSOLUTE:
- pass
- elif self.get_target(pre[pre_rtarget]) != target:
- pass
- else:
- rtarget = pre_rtarget
- else:
- rtarget = pre_rtarget
- pre_rtarget = pre[rtarget]
- # Does the "jump if" jump beyond a jump op?
- # That is, we have something like:
- # POP_JUMP_IF_FALSE HERE
- # ...
- # JUMP_FORWARD
- # HERE:
- #
- # If so, this can be a block inside an "if" statement
- # or a conditional assignment like:
- # x = 1 if x else 2
- #
- # There are other situations we may need to consider, like
- # if the condition jump is to a forward location.
- # Also the existence of a jump to the instruction after "END_FINALLY"
- # will distinguish "try/else" from "try".
- code_pre_rtarget = code[pre_rtarget]
- if code_pre_rtarget in self.jump_forward:
- if_end = self.get_target(pre_rtarget)
- # Is this a loop and not an "if" statement?
- if (if_end < pre_rtarget) and (pre[if_end] in self.setup_loop_targets):
- if if_end > start:
- return
- else:
- # We still have the case in 2.7 that the next instruction
- # is a jump to a SETUP_LOOP target.
- next_offset = target + instruction_size(
- self.code[target], self.opc
- )
- next_op = self.code[next_offset]
- if self.op_name(next_op) == "JUMP_FORWARD":
- jump_target = self.get_target(next_offset, next_op)
- if jump_target in self.setup_loops:
- self.structs.append(
- {
- "type": "while-loop",
- "start": jump_if_offset,
- "end": jump_target,
- }
- )
- self.fixed_jumps[jump_if_offset] = jump_target
- return
- end_offset = self.restrict_to_parent(if_end, parent)
- if_then_maybe = None
- if (2, 2) <= self.version <= (2, 6):
- # Take the JUMP_IF target. In an "if/then", it will be
- # a POP_TOP instruction and the instruction before it
- # will be a JUMP_FORWARD to just after the POP_TOP.
- # For example:
- # Good:
- # 3 JUMP_IF_FALSE 33 'to 39'
- # ..
- # 36 JUMP_FORWARD 1 'to 40'
- # 39 POP_TOP
- # 40 ...
- # example:
- # BAD (is an "and"):
- # 28 JUMP_IF_FALSE 4 'to 35'
- # ...
- # 32 JUMP_ABSOLUTE 40 'to 40' # should be 36 or there should
- # # be a COME_FROM at the pop top
- # # before 40 to 35
- # 35 POP_TOP
- # 36 ...
- # 39 POP_TOP
- # 39_0 COME_FROM 3
- # 40 ...
- if self.opname_for_offset(jump_if_offset).startswith("JUMP_IF"):
- jump_if_target = code[jump_if_offset + 1]
- if (
- self.opname_for_offset(jump_if_target + jump_if_offset + 3)
- == "POP_TOP"
- ):
- jump_inst = jump_if_target + jump_if_offset
- jump_offset = code[jump_inst + 1]
- jump_op = self.opname_for_offset(jump_inst)
- if jump_op == "JUMP_FORWARD" and jump_offset == 1:
- self.structs.append(
- {
- "type": "if-then",
- "start": start - 3,
- "end": pre_rtarget,
- }
- )
- self.thens[start] = end_offset
- elif jump_op == "JUMP_ABSOLUTE":
- if_then_maybe = {
- "type": "if-then",
- "start": start - 3,
- "end": pre_rtarget,
- }
- elif self.version[:2] == (2, 7):
- self.structs.append(
- {"type": "if-then", "start": start - 3, "end": pre_rtarget}
- )
- # FIXME: this is yet another case were we need dominators.
- if pre_rtarget not in self.linestarts or self.version < (2, 7):
- self.not_continue.add(pre_rtarget)
- if rtarget < end_offset:
- # We have an "else" block of some kind.
- # Is it associated with "if_then_maybe" seen above?
- # These will be linked in this funny way:
- # 198 JUMP_IF_FALSE 18 'to 219'
- # 201 POP_TOP
- # ...
- # 216 JUMP_ABSOLUTE 256 'to 256'
- # 219 POP_TOP
- # ...
- # 252 JUMP_FORWARD 1 'to 256'
- # 255 POP_TOP
- # 256
- if if_then_maybe and jump_op == "JUMP_ABSOLUTE":
- jump_target = self.get_target(jump_inst, code[jump_inst])
- if self.opname_for_offset(end_offset) == "JUMP_FORWARD":
- end_target = self.get_target(end_offset, code[end_offset])
- if jump_target == end_target:
- self.structs.append(if_then_maybe)
- self.thens[start] = end_offset
- self.structs.append(
- {"type": "else", "start": rtarget, "end": end_offset}
- )
- elif code_pre_rtarget == self.opc.RETURN_VALUE:
- if self.version[:2] == (2, 7) or pre_rtarget not in self.ignore_if:
- # Below, 10 is exception-match. If there is an exception
- # match in the compare, then this is an exception
- # clause not an if-then clause
- if (
- self.code[self.prev[offset]] != self.opc.COMPARE_OP
- or self.code[self.prev[offset] + 1] != 10
- ):
- self.structs.append(
- {"type": "if-then", "start": start, "end": rtarget}
- )
- self.thens[start] = rtarget
- if (
- self.version[:2] == (2, 7)
- or code[pre_rtarget + 1] != self.opc.JUMP_FORWARD
- ):
- # The below is a big hack until we get
- # better control flow analysis: disallow
- # END_IF if the instruction before the
- # END_IF instruction happens to be a jump
- # target. In this case, probably what's
- # gone on is that we messed up on the
- # END_IF location and it should be the
- # instruction before.
- self.fixed_jumps[offset] = rtarget
- if (
- self.version[:2] == (2, 7)
- and self.insts[
- self.offset2inst_index[pre[pre_rtarget]]
- ].is_jump_target
- ):
- self.return_end_ifs.add(pre[pre_rtarget])
- pass
- else:
- self.return_end_ifs.add(pre_rtarget)
- pass
- pass
- pass
- elif op in self.pop_jump_if_or_pop:
- target = self.get_target(offset, op)
- self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
- def find_jump_targets(self, debug):
- """
- Detect all offsets in a byte code which are jump targets
- where we might insert a pseudo "COME_FROM" instruction.
- "COME_FROM" instructions are used in detecting overall
- control flow. The more detailed information about the
- control flow is captured in self.structs.
- Since this stuff is tricky, consult self.structs when
- something goes amiss.
- Return the list of offsets. An instruction can be jumped
- to in from multiple instructions.
- """
- code = self.code
- n = len(code)
- self.structs = [{"type": "root", "start": 0, "end": n - 1}]
- # All loop entry points
- self.loops = []
- # Map fixed jumps to their real destination
- self.fixed_jumps = {}
- self.ignore_if = set()
- self.build_statement_indices()
- # Containers filled by detect_control_flow()
- self.not_continue = set()
- self.return_end_ifs = set()
- self.setup_loop_targets = {} # target given setup_loop offset
- self.setup_loops = {} # setup_loop offset given target
- self.thens = {} # JUMP_IF's that separate the 'then' part of an 'if'
- targets = {}
- extended_arg = 0
- for offset in self.op_range(0, n):
- op = code[offset]
- if op == self.opc.EXTENDED_ARG:
- arg = code2num(code, offset + 1) | extended_arg
- extended_arg += self.extended_arg_val(arg)
- continue
- # Determine structures and fix jumps in Python versions
- # since 2.3
- self.detect_control_flow(offset, op, extended_arg)
- if op_has_argument(op, self.opc):
- label = self.fixed_jumps.get(offset)
- oparg = self.get_argument(offset)
- if label is None:
- if op in self.opc.JREL_OPS and self.op_name(op) != "FOR_ITER":
- # if (op in self.opc.JREL_OPS and
- # (self.version < 2.0 or op != self.opc.FOR_ITER)):
- label = offset + 3 + oparg
- elif self.version[:2] == (2, 7) and op in self.opc.JABS_OPS:
- if op in (
- self.opc.JUMP_IF_FALSE_OR_POP,
- self.opc.JUMP_IF_TRUE_OR_POP,
- ):
- if oparg > offset:
- label = oparg
- pass
- pass
- # FIXME FIXME FIXME
- # All the conditions are horrible, and I am not sure I
- # understand fully what's going l
- # We REALLY REALLY need a better way to handle control flow
- # Especially for < 2.7
- if label is not None and label != -1:
- if self.version[:2] == (2, 7):
- # FIXME: rocky: I think we need something like this...
- if label in self.setup_loops:
- source = self.setup_loops[label]
- else:
- source = offset
- targets[label] = targets.get(label, []) + [source]
- elif not (
- code[label] == self.opc.POP_TOP
- and code[self.prev[label]] == self.opc.RETURN_VALUE
- ):
- # In Python < 2.7, don't add a COME_FROM, for:
- # ~RETURN_VALUE POP_TOP .. END_FINALLY
- # or:
- # ~RETURN_VALUE POP_TOP .. POP_TOP END_FINALLY
- skip_come_from = code[offset + 3] == self.opc.END_FINALLY or (
- code[offset + 3] == self.opc.POP_TOP
- and code[offset + 4] == self.opc.END_FINALLY
- )
- # The below is for special try/else handling
- if skip_come_from and op == self.opc.JUMP_FORWARD:
- skip_come_from = False
- if not skip_come_from:
- # FIXME: rocky: I think we need something like this...
- if offset not in set(self.ignore_if):
- if label in self.setup_loops:
- source = self.setup_loops[label]
- else:
- source = offset
- # FIXME: The grammar for 2.6 and before doesn't
- # handle COME_FROM's from a loop inside if's
- # It probably should.
- if (
- self.version > (2, 6)
- or self.code[source] != self.opc.SETUP_LOOP
- or self.code[label] != self.opc.JUMP_FORWARD
- ):
- targets[label] = targets.get(label, []) + [source]
- pass
- pass
- pass
- pass
- elif (
- op == self.opc.END_FINALLY
- and offset in self.fixed_jumps
- and self.version[:2] == (2, 7)
- ):
- label = self.fixed_jumps[offset]
- targets[label] = targets.get(label, []) + [offset]
- pass
- extended_arg = 0
- pass # for loop
- # DEBUG:
- if debug in ("both", "after"):
- print(targets)
- import pprint as pp
- pp.pprint(self.structs)
- return targets
- def patch_continue(self, tokens, offset, op):
- if op in (self.opc.JUMP_FORWARD, self.opc.JUMP_ABSOLUTE):
- # FIXME: this is a hack to catch stuff like:
- # for ...
- # try: ...
- # except: continue
- # the "continue" is not on a new line.
- n = len(tokens)
- if (
- n > 2
- and tokens[-1].kind == "JUMP_BACK"
- and self.code[offset + 3] == self.opc.END_FINALLY
- ):
- tokens[-1].kind = intern("CONTINUE")
- # FIXME: combine with scanner3.py code and put into scanner.py
- def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
- """
- Find all <instr> in the block from start to end.
- <instr> is any python bytecode instruction or a list of opcodes
- If <instr> is an opcode with a target (like a jump), a target
- destination can be specified which must match precisely.
- Return a list with indexes to them or [] if none found.
- """
- assert start >= 0 and end <= len(self.code) and start <= end
- try:
- None in instr
- except:
- instr = [instr]
- instr_offsets = []
- for i in self.op_range(start, end):
- op = self.code[i]
- if op in instr:
- if target is None:
- instr_offsets.append(i)
- else:
- t = self.get_target(i, op)
- if include_beyond_target and t >= target:
- instr_offsets.append(i)
- elif t == target:
- instr_offsets.append(i)
- pjits = self.all_instr(start, end, self.opc.PJIT)
- filtered = []
- for pjit in pjits:
- tgt = self.get_target(pjit) - 3
- for i in instr_offsets:
- if i <= pjit or i >= tgt:
- filtered.append(i)
- instr_offsets = filtered
- filtered = []
- return instr_offsets
|