| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485 |
- # Copyright (c) 2016-2017, 2020-2021 by Rocky Bernstein
- """
- Python 3.0 bytecode scanner/deparser
- This sets up opcodes Python's 3.0 and calls a generalized
- scanner routine for Python 3.
- """
- from __future__ import print_function
- import xdis
- from xdis import instruction_size
- # bytecode verification, verify(), uses JUMP_OPs from here
- from xdis.opcodes import opcode_30 as opc
- from uncompyle6.scanners.scanner3 import Scanner3
- JUMP_TF = frozenset([opc.JUMP_IF_FALSE, opc.JUMP_IF_TRUE])
- class Scanner30(Scanner3):
- def __init__(self, show_asm=None, is_pypy=False):
- Scanner3.__init__(self, (3, 0), show_asm, is_pypy)
- return
- pass
- def detect_control_flow(self, offset, targets, inst_index):
- """
- Detect structures and their boundaries to fix optimized jumps
- Python 3.0 is more like Python 2.6 than it is Python 3.x.
- So we have a special routine here.
- """
- code = self.code
- op = self.insts[inst_index].opcode
- # Detect parent structure
- parent = self.structs[0]
- start = parent["start"]
- end = parent["end"]
- # Pick innermost parent for our offset
- for struct in self.structs:
- current_start = struct["start"]
- current_end = struct["end"]
- if (current_start <= offset < current_end) and (
- current_start >= start and current_end <= end
- ):
- start = current_start
- end = current_end
- parent = struct
- if op == self.opc.SETUP_LOOP:
- # We categorize loop types: 'for', 'while', 'while 1' with
- # possibly suffixes '-loop' and '-else'
- # Try to find the jump_back instruction of the loop.
- # It could be a return instruction.
- start += instruction_size(op, self.opc)
- target = self.get_target(offset)
- end = self.restrict_to_parent(target, parent)
- self.setup_loops[target] = offset
- if target != end:
- self.fixed_jumps[offset] = end
- (line_no, next_line_byte) = self.lines[offset]
- jump_back = self.last_instr(
- start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False
- )
- if jump_back:
- jump_forward_offset = xdis.next_offset(
- code[jump_back], self.opc, jump_back
- )
- else:
- jump_forward_offset = None
- return_val_offset1 = self.prev[self.prev[end]]
- if (
- jump_back
- and jump_back != self.prev_op[end]
- and self.is_jump_forward(jump_forward_offset)
- ):
- if code[self.prev_op[end]] == self.opc.RETURN_VALUE or (
- code[self.prev_op[end]] == self.opc.POP_BLOCK
- and code[return_val_offset1] == self.opc.RETURN_VALUE
- ):
- jump_back = None
- if not jump_back:
- # loop suite ends in return
- jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
- if not jump_back:
- return
- jb_inst = self.get_inst(jump_back)
- jump_back = self.next_offset(jb_inst.opcode, jump_back)
- if_offset = None
- if code[self.prev_op[next_line_byte]] not in JUMP_TF:
- if_offset = self.prev[next_line_byte]
- if if_offset:
- loop_type = "while"
- self.ignore_if.add(if_offset)
- else:
- loop_type = "for"
- target = next_line_byte
- end = jump_back + 3
- else:
- if self.get_target(jump_back) >= next_line_byte:
- jump_back = self.last_instr(
- start, end, self.opc.JUMP_ABSOLUTE, start, False
- )
- jb_inst = self.get_inst(jump_back)
- jb_next_offset = self.next_offset(jb_inst.opcode, jump_back)
- if end > jb_next_offset and self.is_jump_forward(end):
- if self.is_jump_forward(jb_next_offset):
- if self.get_target(jump_back + 4) == self.get_target(end):
- self.fixed_jumps[offset] = jump_back + 4
- end = jb_next_offset
- elif target < offset:
- self.fixed_jumps[offset] = jump_back + 4
- end = jb_next_offset
- target = self.get_target(jump_back)
- if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
- loop_type = "for"
- else:
- loop_type = "while"
- test = self.prev_op[next_line_byte]
- if test == offset:
- loop_type = "while 1"
- elif self.code[test] in self.opc.JUMP_OPs:
- self.ignore_if.add(test)
- test_target = self.get_target(test)
- if test_target > (jump_back + 3):
- jump_back = test_target
- self.not_continue.add(jump_back)
- self.loops.append(target)
- self.structs.append(
- {"type": loop_type + "-loop", "start": target, "end": jump_back}
- )
- after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back)
- if self.get_inst(after_jump_offset).opname == "POP_TOP":
- after_jump_offset = xdis.next_offset(
- code[after_jump_offset], self.opc, after_jump_offset
- )
- if after_jump_offset != end:
- self.structs.append(
- {
- "type": loop_type + "-else",
- "start": after_jump_offset,
- "end": end,
- }
- )
- elif op in self.pop_jump_tf:
- start = offset + instruction_size(op, self.opc)
- target = self.get_target(offset)
- rtarget = self.restrict_to_parent(target, parent)
- prev_op = self.prev_op
- # Do not let jump to go out of parent struct bounds
- if target != rtarget and parent["type"] == "and/or":
- self.fixed_jumps[offset] = rtarget
- return
- # Does this jump to right after another conditional jump that is
- # not myself? If so, it's part of a larger conditional.
- # rocky: if we have a conditional jump to the next instruction, then
- # possibly I am "skipping over" a "pass" or null statement.
- if (
- (code[prev_op[target]] in self.pop_jump_if_pop)
- and (target > offset)
- and prev_op[target] != offset
- ):
- self.fixed_jumps[offset] = prev_op[target]
- self.structs.append(
- {"type": "and/or", "start": start, "end": prev_op[target]}
- )
- return
- # The op offset just before the target jump offset is important
- # in making a determination of what we have. Save that.
- pre_rtarget = prev_op[rtarget]
- # Is it an "and" inside an "if" or "while" block
- if op == opc.JUMP_IF_FALSE:
- # Search for another JUMP_IF_FALSE targeting the same op,
- # in current statement, starting from current offset, and filter
- # everything inside inner 'or' jumps and midline ifs
- match = self.rem_or(
- start, self.next_stmt[offset], opc.JUMP_IF_FALSE, target
- )
- # If we still have any offsets in set, start working on it
- if match:
- is_jump_forward = self.is_jump_forward(pre_rtarget)
- if (
- is_jump_forward
- and pre_rtarget not in self.stmts
- and self.restrict_to_parent(
- self.get_target(pre_rtarget), parent
- )
- == rtarget
- ):
- if (
- code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE
- and self.remove_mid_line_ifs([offset])
- and target == self.get_target(prev_op[pre_rtarget])
- and (
- prev_op[pre_rtarget] not in self.stmts
- or self.get_target(prev_op[pre_rtarget])
- > prev_op[pre_rtarget]
- )
- and 1
- == len(
- self.remove_mid_line_ifs(
- self.rem_or(
- start, prev_op[pre_rtarget], JUMP_TF, target
- )
- )
- )
- ):
- pass
- elif (
- code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE
- and self.remove_mid_line_ifs([offset])
- and 1
- == (
- len(
- set(
- self.remove_mid_line_ifs(
- self.rem_or(
- start,
- prev_op[pre_rtarget],
- JUMP_TF,
- target,
- )
- )
- )
- | set(
- self.remove_mid_line_ifs(
- self.rem_or(
- start,
- prev_op[pre_rtarget],
- (
- opc.JUMP_IF_FALSE,
- opc.JUMP_IF_TRUE,
- opc.JUMP_ABSOLUTE,
- ),
- pre_rtarget,
- True,
- )
- )
- )
- )
- )
- ):
- pass
- else:
- fix = None
- jump_ifs = self.inst_matches(
- start, self.next_stmt[offset], opc.JUMP_IF_FALSE
- )
- last_jump_good = True
- for j in jump_ifs:
- if target == self.get_target(j):
- # FIXME: remove magic number
- if self.lines[j].next == j + 3 and last_jump_good:
- fix = j
- break
- else:
- last_jump_good = False
- self.fixed_jumps[offset] = fix or match[-1]
- return
- else:
- self.fixed_jumps[offset] = match[-1]
- return
- # op == JUMP_IF_TRUE
- else:
- next = self.next_stmt[offset]
- if prev_op[next] == offset:
- pass
- elif self.is_jump_forward(next) and target == self.get_target(next):
- if code[prev_op[next]] == opc.JUMP_IF_FALSE:
- if (
- code[next] == self.opc.JUMP_FORWARD
- or target != rtarget
- or code[prev_op[pre_rtarget]]
- not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)
- ):
- self.fixed_jumps[offset] = prev_op[next]
- return
- elif (
- code[next] == self.opc.JUMP_ABSOLUTE
- and self.is_jump_forward(target)
- and self.get_target(target) == self.get_target(next)
- ):
- self.fixed_jumps[offset] = prev_op[next]
- return
- # Don't add a struct for a while test, it's already taken care of
- if offset in self.ignore_if:
- return
- if (
- code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
- and pre_rtarget in self.stmts
- and pre_rtarget != offset
- and prev_op[pre_rtarget] != offset
- and not (
- code[rtarget] == self.opc.JUMP_ABSOLUTE
- and code[rtarget + 3] == self.opc.POP_BLOCK
- and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE
- )
- ):
- rtarget = pre_rtarget
- # Does the "jump if" jump beyond a jump op?
- # That is, we have something like:
- # JUMP_IF_FALSE HERE
- # ...
- # JUMP_FORWARD
- # HERE:
- #
- # If so, this can be block inside an "if" statement
- # or a conditional assignment like:
- # x = 1 if x else 2
- #
- # There are other contexts we may need to consider
- # like whether the target is "END_FINALLY"
- # or if the condition jump is to a forward location
- if self.is_jump_forward(pre_rtarget):
- if_end = self.get_target(pre_rtarget, 0)
- # If the jump target is back, we are looping
- if if_end < pre_rtarget and (
- code[prev_op[if_end]] == self.opc.SETUP_LOOP
- ):
- if if_end > start:
- return
- self.restrict_to_parent(if_end, parent)
- self.structs.append(
- {"type": "if-then", "start": start, "end": pre_rtarget}
- )
- self.not_continue.add(pre_rtarget)
- # if rtarget < end and (
- # code[rtarget] not in (self.opc.END_FINALLY,
- # self.opc.JUMP_ABSOLUTE) and
- # code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT,
- # self.opc.END_FINALLY)):
- # self.structs.append({'type': 'else',
- # 'start': rtarget,
- # 'end': end})
- # self.else_start[rtarget] = end
- elif self.is_jump_back(pre_rtarget, 0):
- self.structs.append(
- {"type": "if-then", "start": start, "end": pre_rtarget}
- )
- self.not_continue.add(pre_rtarget)
- elif code[pre_rtarget] in (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP):
- self.structs.append({"type": "if-then", "start": start, "end": rtarget})
- # It is important to distinguish if this return is inside some sort
- # except block return
- jump_prev = prev_op[offset]
- if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
- if self.opc.cmp_op[code[jump_prev + 1]] == "exception-match":
- return
- if self.version >= (3, 5):
- # Python 3.5 may remove as dead code a JUMP
- # instruction after a RETURN_VALUE. So we check
- # based on seeing SETUP_EXCEPT various places.
- if code[rtarget] == self.opc.SETUP_EXCEPT:
- return
- # Check that next instruction after pops and jump is
- # not from SETUP_EXCEPT
- next_op = rtarget
- if code[next_op] == self.opc.POP_BLOCK:
- next_op += instruction_size(self.code[next_op], self.opc)
- if code[next_op] == self.opc.JUMP_ABSOLUTE:
- next_op += instruction_size(self.code[next_op], self.opc)
- if next_op in targets:
- for try_op in targets[next_op]:
- come_from_op = code[try_op]
- if come_from_op == self.opc.SETUP_EXCEPT:
- return
- pass
- pass
- if code[pre_rtarget] == self.opc.RETURN_VALUE:
- if self.version == (3, 0):
- next_op = rtarget
- if code[next_op] == self.opc.POP_TOP:
- next_op = rtarget
- for block in self.structs:
- if (
- block["type"] == "while-loop"
- and block["end"] == next_op
- ):
- return
- next_op += instruction_size(self.code[next_op], self.opc)
- if code[next_op] == self.opc.POP_BLOCK:
- return
- self.return_end_ifs.add(pre_rtarget)
- else:
- self.fixed_jumps[offset] = rtarget
- self.not_continue.add(pre_rtarget)
- elif op == self.opc.SETUP_EXCEPT:
- target = self.get_target(offset)
- end = self.restrict_to_parent(target, parent)
- self.fixed_jumps[offset] = end
- elif op == self.opc.SETUP_FINALLY:
- target = self.get_target(offset)
- end = self.restrict_to_parent(target, parent)
- self.fixed_jumps[offset] = end
- elif op in self.jump_if_pop:
- target = self.get_target(offset)
- if target > offset:
- unop_target = self.last_instr(
- offset, target, self.opc.JUMP_FORWARD, target
- )
- if unop_target and code[unop_target + 3] != self.opc.ROT_TWO:
- self.fixed_jumps[offset] = unop_target
- else:
- self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
- pass
- pass
- elif self.version >= (3, 5):
- # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
- # misclassified as RETURN_END_IF. Handle that here.
- # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
- if op == self.opc.RETURN_VALUE:
- if (
- offset + 1 < len(code)
- and code[offset + 1] == self.opc.JUMP_ABSOLUTE
- and offset in self.return_end_ifs
- ):
- self.return_end_ifs.remove(offset)
- pass
- pass
- elif op == self.opc.JUMP_FORWARD:
- # If we have:
- # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
- # then RETURN_VALUE is not RETURN_END_IF
- rtarget = self.get_target(offset)
- rtarget_prev = self.prev[rtarget]
- if (
- code[rtarget_prev] == self.opc.RETURN_VALUE
- and rtarget_prev in self.return_end_ifs
- ):
- i = rtarget_prev
- while i != offset:
- if code[i] in [opc.JUMP_FORWARD, opc.JUMP_ABSOLUTE]:
- return
- i = self.prev[i]
- self.return_end_ifs.remove(rtarget_prev)
- pass
- return
- if __name__ == "__main__":
- from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
- if PYTHON_VERSION_TRIPLE[:2] == (3, 0):
- import inspect
- co = inspect.currentframe().f_code # type: ignore
- tokens, customize = Scanner30().ingest(co)
- for t in tokens:
- print(t.format())
- pass
- else:
- print("Need to be Python 3.0 to demo; I am version %s" % version_tuple_to_str())
|