scanner2.py 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470
  1. # Copyright (c) 2015-2024 by Rocky Bernstein
  2. # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
  3. # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. """
  18. Python 2 Generic bytecode scanner/deparser
  19. This overlaps various Python3's dis module, but it can be run from
  20. Python versions other than the version running this code. Notably,
  21. run from Python version 2.
  22. Also we *modify* the instruction sequence to assist deparsing code.
  23. For example:
  24. - we add "COME_FROM" instructions to help in figuring out
  25. conditional branching and looping.
  26. - LOAD_CONSTs are classified further into the type of thing
  27. they load:
  28. lambda's, genexpr's, {dict,set,list} comprehension's,
  29. - PARAMETER counts appended {CALL,MAKE}_FUNCTION, BUILD_{TUPLE,SET,SLICE}
  30. Finally we save token information.
  31. """
  32. from __future__ import print_function
  33. from copy import copy
  34. from sys import intern
  35. from xdis import code2num, instruction_size, iscode, op_has_argument
  36. from xdis.bytecode import _get_const_info
  37. from uncompyle6.scanner import Scanner, Token
  38. class Scanner2(Scanner):
  39. def __init__(self, version, show_asm=None, is_pypy=False):
  40. Scanner.__init__(self, version, show_asm, is_pypy)
  41. self.pop_jump_if = frozenset([self.opc.PJIF, self.opc.PJIT])
  42. self.jump_forward = frozenset([self.opc.JUMP_ABSOLUTE, self.opc.JUMP_FORWARD])
  43. # This is the 2.5+ default
  44. # For <2.5 it is <generator expression>
  45. self.genexpr_name = "<genexpr>"
  46. self.load_asserts = set([])
  47. # Create opcode classification sets
  48. # Note: super initialization above initializes self.opc
  49. # Ops that start SETUP_ ... We will COME_FROM with these names
  50. # Some blocks and END_ statements. And they can start
  51. # a new statement
  52. self.statement_opcodes = frozenset(
  53. [
  54. self.opc.SETUP_LOOP,
  55. self.opc.BREAK_LOOP,
  56. self.opc.SETUP_FINALLY,
  57. self.opc.END_FINALLY,
  58. self.opc.SETUP_EXCEPT,
  59. self.opc.POP_BLOCK,
  60. self.opc.STORE_FAST,
  61. self.opc.DELETE_FAST,
  62. self.opc.STORE_DEREF,
  63. self.opc.STORE_GLOBAL,
  64. self.opc.DELETE_GLOBAL,
  65. self.opc.STORE_NAME,
  66. self.opc.DELETE_NAME,
  67. self.opc.STORE_ATTR,
  68. self.opc.DELETE_ATTR,
  69. self.opc.STORE_SUBSCR,
  70. self.opc.DELETE_SUBSCR,
  71. self.opc.RETURN_VALUE,
  72. self.opc.RAISE_VARARGS,
  73. self.opc.POP_TOP,
  74. self.opc.PRINT_EXPR,
  75. self.opc.PRINT_ITEM,
  76. self.opc.PRINT_NEWLINE,
  77. self.opc.PRINT_ITEM_TO,
  78. self.opc.PRINT_NEWLINE_TO,
  79. self.opc.CONTINUE_LOOP,
  80. self.opc.JUMP_ABSOLUTE,
  81. self.opc.EXEC_STMT,
  82. ]
  83. )
  84. # Opcodes that can start a "store" non-terminal.
  85. # FIXME: JUMP_ABSOLUTE is weird. What's up with that?
  86. self.designator_ops = frozenset(
  87. [
  88. self.opc.STORE_FAST,
  89. self.opc.STORE_NAME,
  90. self.opc.STORE_GLOBAL,
  91. self.opc.STORE_DEREF,
  92. self.opc.STORE_ATTR,
  93. self.opc.STORE_SLICE_0,
  94. self.opc.STORE_SLICE_1,
  95. self.opc.STORE_SLICE_2,
  96. self.opc.STORE_SLICE_3,
  97. self.opc.STORE_SUBSCR,
  98. self.opc.UNPACK_SEQUENCE,
  99. self.opc.JUMP_ABSOLUTE,
  100. ]
  101. )
  102. # Python 2.7 has POP_JUMP_IF_{TRUE,FALSE}_OR_POP but < 2.7 doesn't
  103. # Add an empty set make processing more uniform.
  104. self.pop_jump_if_or_pop = frozenset([])
  105. # opcodes with expect a variable number pushed values whose
  106. # count is in the opcode. For parsing we generally change the
  107. # opcode name to include that number.
  108. self.varargs_ops = frozenset(
  109. [
  110. self.opc.BUILD_LIST,
  111. self.opc.BUILD_TUPLE,
  112. self.opc.BUILD_SLICE,
  113. self.opc.UNPACK_SEQUENCE,
  114. self.opc.MAKE_FUNCTION,
  115. self.opc.CALL_FUNCTION,
  116. self.opc.MAKE_CLOSURE,
  117. self.opc.CALL_FUNCTION_VAR,
  118. self.opc.CALL_FUNCTION_KW,
  119. self.opc.CALL_FUNCTION_VAR_KW,
  120. self.opc.DUP_TOPX,
  121. self.opc.RAISE_VARARGS,
  122. ]
  123. )
  124. @staticmethod
  125. def extended_arg_val(arg):
  126. """Return integer value of an EXTENDED_ARG operand.
  127. In Python2 this always the operand value shifted 16 bits since
  128. the operand is always 2 bytes. In Python 3.6+ this changes to one byte.
  129. """
  130. return arg << 16
  131. @staticmethod
  132. def unmangle_name(name, classname):
  133. """Remove __ from the end of _name_ if it starts with __classname__
  134. return the "unmangled" name.
  135. """
  136. if name.startswith(classname) and name[-2:] != "__":
  137. return name[len(classname) - 2 :]
  138. return name
  139. @classmethod
  140. def unmangle_code_names(self, co, classname):
  141. """Remove __ from the end of _name_ if it starts with __classname__
  142. return the "unmangled" name.
  143. """
  144. if classname:
  145. classname = "_" + classname.lstrip("_") + "__"
  146. if hasattr(co, "co_cellvars"):
  147. free = [
  148. self.unmangle_name(name, classname)
  149. for name in (co.co_cellvars + co.co_freevars)
  150. ]
  151. else:
  152. free = ()
  153. names = [self.unmangle_name(name, classname) for name in co.co_names]
  154. varnames = [self.unmangle_name(name, classname) for name in co.co_varnames]
  155. else:
  156. if hasattr(co, "co_cellvars"):
  157. free = co.co_cellvars + co.co_freevars
  158. else:
  159. free = ()
  160. names = co.co_names
  161. varnames = co.co_varnames
  162. return free, names, varnames
  163. def ingest(self, co, classname=None, code_objects={}, show_asm=None):
  164. """
  165. Create "tokens" the bytecode of an Python code object. Largely these
  166. are the opcode name, but in some cases that has been modified to make parsing
  167. easier.
  168. returning a list of uncompyle6 Token's.
  169. Some transformations are made to assist the deparsing grammar:
  170. - various types of LOAD_CONST's are categorized in terms of what they load
  171. - COME_FROM instructions are added to assist parsing control structures
  172. - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
  173. * BUILD_LIST, BUILD_SET
  174. * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
  175. - EXTENDED_ARGS instructions are removed
  176. Also, when we encounter certain tokens, we add them to a set which will cause custom
  177. grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
  178. cause specific rules for the specific number of arguments they take.
  179. """
  180. if not show_asm:
  181. show_asm = self.show_asm
  182. bytecode = self.build_instructions(co)
  183. if show_asm in ("both", "before"):
  184. print("\n# ---- disassembly:")
  185. bytecode.disassemble_bytes(
  186. co.co_code,
  187. varnames=co.co_varnames,
  188. names=co.co_names,
  189. constants=co.co_consts,
  190. cells=bytecode._cell_names,
  191. line_starts=bytecode._linestarts,
  192. asm_format="extended",
  193. )
  194. # list of tokens/instructions
  195. new_tokens = []
  196. # "customize" is in the process of going away here
  197. customize = {}
  198. if self.is_pypy:
  199. customize["PyPy"] = 0
  200. codelen = len(self.code)
  201. free, names, varnames = self.unmangle_code_names(co, classname)
  202. self.names = names
  203. # Scan for assertions. Later we will
  204. # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
  205. # 'LOAD_ASSERT' is used in assert statements.
  206. self.load_asserts = set()
  207. for i in self.op_range(0, codelen):
  208. # We need to detect the difference between:
  209. # raise AssertionError
  210. # and
  211. # assert ...
  212. # Below we use the heuristic that an "sssert" is preceded by a POP_JUMP.
  213. # however we could also use followed by RAISE_VARARGS
  214. # or for PyPy there may be a JUMP_IF_NOT_DEBUG before.
  215. # FIXME: remove uses of PJIF, and PJIT
  216. if self.is_pypy:
  217. have_pop_jump = self.code[i] in (self.opc.PJIF, self.opc.PJIT)
  218. else:
  219. have_pop_jump = self.code[i] == self.opc.PJIT
  220. if have_pop_jump and self.code[i + 3] == self.opc.LOAD_GLOBAL:
  221. if names[self.get_argument(i + 3)] == "AssertionError":
  222. self.load_asserts.add(i + 3)
  223. # Get jump targets
  224. # Format: {target offset: [jump offsets]}
  225. load_asserts_save = copy(self.load_asserts)
  226. jump_targets = self.find_jump_targets(show_asm)
  227. self.load_asserts = load_asserts_save
  228. # print("XXX2", jump_targets)
  229. last_stmt = self.next_stmt[0]
  230. i = self.next_stmt[last_stmt]
  231. replace = {}
  232. while i < codelen - 1:
  233. if self.lines[last_stmt].next > i:
  234. # Distinguish "print ..." from "print ...,"
  235. if self.code[last_stmt] == self.opc.PRINT_ITEM:
  236. if self.code[i] == self.opc.PRINT_ITEM:
  237. replace[i] = "PRINT_ITEM_CONT"
  238. elif self.code[i] == self.opc.PRINT_NEWLINE:
  239. replace[i] = "PRINT_NEWLINE_CONT"
  240. last_stmt = i
  241. i = self.next_stmt[i]
  242. extended_arg = 0
  243. for offset in self.op_range(0, codelen):
  244. if offset in jump_targets:
  245. jump_idx = 0
  246. # We want to process COME_FROMs to the same offset to be in *descending*
  247. # offset order so we have the larger range or biggest instruction interval
  248. # last. (I think they are sorted in increasing order, but for safety
  249. # we sort them). That way, specific COME_FROM tags will match up
  250. # properly. For example, a "loop" with an "if" nested in it should have the
  251. # "loop" tag last so the grammar rule matches that properly.
  252. for jump_offset in sorted(jump_targets[offset], reverse=True):
  253. # if jump_offset == last_offset:
  254. # continue
  255. # last_offset = jump_offset
  256. come_from_name = "COME_FROM"
  257. op_name = self.opname_for_offset(jump_offset)
  258. if op_name.startswith("SETUP_") and self.version[:2] == (2, 7):
  259. come_from_type = op_name[len("SETUP_") :]
  260. if come_from_type not in ("LOOP", "EXCEPT"):
  261. come_from_name = "COME_FROM_%s" % come_from_type
  262. pass
  263. new_tokens.append(
  264. Token(
  265. come_from_name,
  266. jump_offset,
  267. repr(jump_offset),
  268. offset="%s_%d" % (offset, jump_idx),
  269. has_arg=True,
  270. )
  271. )
  272. jump_idx += 1
  273. pass
  274. op = self.code[offset]
  275. op_name = self.op_name(op)
  276. oparg = None
  277. pattr = None
  278. has_arg = op_has_argument(op, self.opc)
  279. if has_arg:
  280. oparg = self.get_argument(offset) + extended_arg
  281. extended_arg = 0
  282. if op == self.opc.EXTENDED_ARG:
  283. extended_arg += self.extended_arg_val(oparg)
  284. continue
  285. # Note: name used to match on rather than op since
  286. # BUILD_SET isn't in earlier Pythons.
  287. if op_name in (
  288. "BUILD_LIST",
  289. "BUILD_SET",
  290. ):
  291. t = Token(
  292. op_name,
  293. oparg,
  294. pattr,
  295. offset,
  296. self.linestarts.get(offset, None),
  297. op,
  298. has_arg,
  299. self.opc,
  300. )
  301. collection_type = op_name.split("_")[1]
  302. next_tokens = self.bound_collection_from_tokens(
  303. new_tokens, t, len(new_tokens), "CONST_%s" % collection_type
  304. )
  305. if next_tokens is not None:
  306. new_tokens = next_tokens
  307. continue
  308. if op in self.opc.CONST_OPS:
  309. const = co.co_consts[oparg]
  310. if iscode(const):
  311. oparg = const
  312. if const.co_name == "<lambda>":
  313. assert op_name == "LOAD_CONST"
  314. op_name = "LOAD_LAMBDA"
  315. elif const.co_name == "<genexpr>":
  316. op_name = "LOAD_GENEXPR"
  317. elif const.co_name == "<dictcomp>":
  318. op_name = "LOAD_DICTCOMP"
  319. elif const.co_name == "<setcomp>":
  320. op_name = "LOAD_SETCOMP"
  321. else:
  322. op_name = "LOAD_CODE"
  323. # verify() uses 'pattr' for comparison, since 'attr'
  324. # now holds Code(const) and thus can not be used
  325. # for comparison (todo: think about changing this)
  326. # pattr = 'code_object @ 0x%x %s->%s' %\
  327. # (id(const), const.co_filename, const.co_name)
  328. pattr = "<code_object " + const.co_name + ">"
  329. else:
  330. if oparg < len(co.co_consts):
  331. argval, _ = _get_const_info(oparg, co.co_consts)
  332. # Why don't we use _ above for "pattr" rather than "const"?
  333. # This *is* a little hoaky, but we have to coordinate with
  334. # other parts like n_LOAD_CONST in pysource.py for example.
  335. pattr = const
  336. pass
  337. elif op in self.opc.NAME_OPS:
  338. pattr = names[oparg]
  339. elif op in self.opc.JREL_OPS:
  340. # use instead: hasattr(self, 'patch_continue'): ?
  341. if self.version[:2] == (2, 7):
  342. self.patch_continue(new_tokens, offset, op)
  343. pattr = repr(offset + 3 + oparg)
  344. elif op in self.opc.JABS_OPS:
  345. # use instead: hasattr(self, 'patch_continue'): ?
  346. if self.version[:2] == (2, 7):
  347. self.patch_continue(new_tokens, offset, op)
  348. pattr = repr(oparg)
  349. elif op in self.opc.LOCAL_OPS:
  350. pattr = varnames[oparg]
  351. elif op in self.opc.COMPARE_OPS:
  352. pattr = self.opc.cmp_op[oparg]
  353. elif op in self.opc.FREE_OPS:
  354. pattr = free[oparg]
  355. if op in self.varargs_ops:
  356. # CE - Hack for >= 2.5
  357. # Now all values loaded via LOAD_CLOSURE are packed into
  358. # a tuple before calling MAKE_CLOSURE.
  359. if (
  360. op == self.opc.BUILD_TUPLE
  361. and self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE
  362. ):
  363. continue
  364. else:
  365. if self.is_pypy and not oparg and op_name == "BUILD_MAP":
  366. op_name = "BUILD_MAP_n"
  367. else:
  368. op_name = "%s_%d" % (op_name, oparg)
  369. pass
  370. # FIXME: Figure out why this is needed and remove.
  371. customize[op_name] = oparg
  372. elif op == self.opc.JUMP_ABSOLUTE:
  373. # Further classify JUMP_ABSOLUTE into backward jumps
  374. # which are used in loops, and "CONTINUE" jumps which
  375. # may appear in a "continue" statement. The loop-type
  376. # and continue-type jumps will help us classify loop
  377. # boundaries The continue-type jumps help us get
  378. # "continue" statements with would otherwise be turned
  379. # into a "pass" statement because JUMPs are sometimes
  380. # ignored in rules as just boundary overhead. In
  381. # comprehensions we might sometimes classify JUMP_BACK
  382. # as CONTINUE, but that's okay since we add a grammar
  383. # rule for that.
  384. target = self.get_target(offset)
  385. if target <= offset:
  386. op_name = "JUMP_BACK"
  387. # 'Continue's include jumps to loops that are not
  388. # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
  389. # If the JUMP_ABSOLUTE is
  390. # either to a FOR_ITER or the instruction after a SETUP_LOOP
  391. # and it is followed by another JUMP_FORWARD
  392. # then we'll take it as a "continue".
  393. j = self.offset2inst_index.get(offset)
  394. # EXTENDED_ARG doesn't appear in instructions,
  395. # but is instead the next opcode folded into it, and has the offset
  396. # of the EXTENDED_ARG. Therefore in self.offset2nist_index we'll find
  397. # the instruction at the previous EXTENDED_ARG offset which is 3
  398. # bytes back.
  399. if j is None and offset > self.opc.ARG_MAX_VALUE:
  400. j = self.offset2inst_index[offset - 3]
  401. target_index = self.offset2inst_index[target]
  402. is_continue = (
  403. self.insts[target_index - 1].opname == "SETUP_LOOP"
  404. and self.insts[j + 1].opname == "JUMP_FORWARD"
  405. )
  406. if is_continue:
  407. op_name = "CONTINUE"
  408. if offset in self.stmts and self.code[offset + 3] not in (
  409. self.opc.END_FINALLY,
  410. self.opc.POP_BLOCK,
  411. ):
  412. if (
  413. (
  414. offset in self.linestarts
  415. and self.code[self.prev[offset]]
  416. == self.opc.JUMP_ABSOLUTE
  417. )
  418. or self.code[target] == self.opc.FOR_ITER
  419. or offset not in self.not_continue
  420. ):
  421. op_name = "CONTINUE"
  422. elif op == self.opc.LOAD_GLOBAL:
  423. if offset in self.load_asserts:
  424. op_name = "LOAD_ASSERT"
  425. elif op == self.opc.RETURN_VALUE:
  426. if offset in self.return_end_ifs:
  427. op_name = "RETURN_END_IF"
  428. linestart = self.linestarts.get(offset, None)
  429. if offset not in replace:
  430. new_tokens.append(
  431. Token(
  432. op_name, oparg, pattr, offset, linestart, op, has_arg, self.opc
  433. )
  434. )
  435. else:
  436. new_tokens.append(
  437. Token(
  438. replace[offset],
  439. oparg,
  440. pattr,
  441. offset,
  442. linestart,
  443. op,
  444. has_arg,
  445. self.opc,
  446. )
  447. )
  448. pass
  449. pass
  450. if show_asm in ("both", "after"):
  451. print("\n# ---- tokenization:")
  452. # FIXME: t.format() is changing tokens!
  453. for t in new_tokens.copy():
  454. print(t.format(line_prefix=""))
  455. print()
  456. return new_tokens, customize
  457. def build_statement_indices(self):
  458. code = self.code
  459. start = 0
  460. end = len(code)
  461. stmt_opcode_seqs = frozenset(
  462. [
  463. (self.opc.PJIF, self.opc.JUMP_FORWARD),
  464. (self.opc.PJIF, self.opc.JUMP_ABSOLUTE),
  465. (self.opc.PJIT, self.opc.JUMP_FORWARD),
  466. (self.opc.PJIT, self.opc.JUMP_ABSOLUTE),
  467. ]
  468. )
  469. prelim = self.all_instr(start, end, self.statement_opcodes)
  470. stmts = self.stmts = set(prelim)
  471. pass_stmts = set()
  472. for seq in stmt_opcode_seqs:
  473. for i in self.op_range(start, end - (len(seq) + 1)):
  474. match = True
  475. for elem in seq:
  476. if elem != code[i]:
  477. match = False
  478. break
  479. i += instruction_size(code[i], self.opc)
  480. if match:
  481. i = self.prev[i]
  482. stmts.add(i)
  483. pass_stmts.add(i)
  484. if pass_stmts:
  485. stmt_list = list(stmts)
  486. stmt_list.sort()
  487. else:
  488. stmt_list = prelim
  489. last_stmt = -1
  490. self.next_stmt = []
  491. slist = self.next_stmt = []
  492. i = 0
  493. for s in stmt_list:
  494. if code[s] == self.opc.JUMP_ABSOLUTE and s not in pass_stmts:
  495. target = self.get_target(s)
  496. if target > s or (
  497. self.lines and self.lines[last_stmt].l_no == self.lines[s].l_no
  498. ):
  499. stmts.remove(s)
  500. continue
  501. j = self.prev[s]
  502. while code[j] == self.opc.JUMP_ABSOLUTE:
  503. j = self.prev[j]
  504. if (
  505. self.version >= (2, 3)
  506. and self.opname_for_offset(j) == "LIST_APPEND"
  507. ): # list comprehension
  508. stmts.remove(s)
  509. continue
  510. elif code[s] == self.opc.POP_TOP:
  511. # The POP_TOP in:
  512. # ROT_TWO, POP_TOP,
  513. # RETURN_xxx, POP_TOP (in 2.6-), or
  514. # JUMP_IF_{FALSE,TRUE}, POP_TOP (in 2.6-)
  515. # is part of the previous instruction and not the
  516. # beginning of a new statement
  517. prev = code[self.prev[s]]
  518. if (
  519. prev == self.opc.ROT_TWO
  520. or self.version < (2, 7)
  521. and prev
  522. in (
  523. self.opc.JUMP_IF_FALSE,
  524. self.opc.JUMP_IF_TRUE,
  525. self.opc.RETURN_VALUE,
  526. )
  527. ):
  528. stmts.remove(s)
  529. continue
  530. elif code[s] in self.designator_ops:
  531. j = self.prev[s]
  532. while code[j] in self.designator_ops:
  533. j = self.prev[j]
  534. if self.version > (2, 1) and code[j] == self.opc.FOR_ITER:
  535. stmts.remove(s)
  536. continue
  537. last_stmt = s
  538. slist += [s] * (s - i)
  539. i = s
  540. slist += [end] * (end - len(slist))
  541. def next_except_jump(self, start):
  542. """
  543. Return the next jump that was generated by an except SomeException:
  544. construct in a try...except...else clause or None if not found.
  545. """
  546. if self.code[start] == self.opc.DUP_TOP:
  547. except_match = self.first_instr(start, len(self.code), self.opc.PJIF)
  548. if except_match:
  549. jmp = self.prev[self.get_target(except_match)]
  550. # In Python < 2.7 we may have jumps to jumps
  551. if self.version < (2, 7) and self.code[jmp] in self.jump_forward:
  552. self.not_continue.add(jmp)
  553. jmp = self.get_target(jmp)
  554. prev_offset = self.prev[except_match]
  555. # COMPARE_OP argument should be "exception-match" or 10
  556. if (
  557. self.code[prev_offset] == self.opc.COMPARE_OP
  558. and self.code[prev_offset + 1] != 10
  559. ):
  560. return None
  561. if jmp not in self.pop_jump_if | self.jump_forward:
  562. self.ignore_if.add(except_match)
  563. return None
  564. self.ignore_if.add(except_match)
  565. self.not_continue.add(jmp)
  566. return jmp
  567. count_END_FINALLY = 0
  568. count_SETUP_ = 0
  569. for i in self.op_range(start, len(self.code)):
  570. op = self.code[i]
  571. if op == self.opc.END_FINALLY:
  572. if count_END_FINALLY == count_SETUP_:
  573. if self.version[:2] == (2, 7):
  574. assert self.code[self.prev[i]] in self.jump_forward | frozenset(
  575. [self.opc.RETURN_VALUE]
  576. )
  577. self.not_continue.add(self.prev[i])
  578. return self.prev[i]
  579. count_END_FINALLY += 1
  580. elif op in self.setup_ops:
  581. count_SETUP_ += 1
  582. def detect_control_flow(self, offset, op, extended_arg):
  583. """
  584. Detect type of block structures and their boundaries to fix optimized jumps
  585. in python2.3+
  586. """
  587. code = self.code
  588. # Detect parent structure
  589. parent = self.structs[0]
  590. start = parent["start"]
  591. end = parent["end"]
  592. next_line_byte = end
  593. # Pick inner-most parent for our offset
  594. for struct in self.structs:
  595. current_start = struct["start"]
  596. current_end = struct["end"]
  597. if (current_start <= offset < current_end) and (
  598. current_start >= start and current_end <= end
  599. ):
  600. start = current_start
  601. end = current_end
  602. parent = struct
  603. if op == self.opc.SETUP_LOOP:
  604. # We categorize loop types: 'for', 'while', 'while 1' with
  605. # possibly suffixes '-loop' and '-else'
  606. # Try to find the jump_back instruction of the loop.
  607. # It could be a return instruction.
  608. inst = self.insts[self.offset2inst_index[offset]]
  609. start += instruction_size(op, self.opc)
  610. setup_target = inst.argval
  611. loop_end_offset = self.restrict_to_parent(setup_target, parent)
  612. self.setup_loop_targets[offset] = setup_target
  613. self.setup_loops[setup_target] = offset
  614. if setup_target != loop_end_offset:
  615. self.fixed_jumps[offset] = loop_end_offset
  616. if self.lines:
  617. (line_no, next_line_byte) = self.lines[offset]
  618. # jump_back_offset is the instruction after the SETUP_LOOP
  619. # where we iterate back to.
  620. jump_back_offset = self.last_instr(
  621. start, loop_end_offset, self.opc.JUMP_ABSOLUTE, next_line_byte, False
  622. )
  623. if jump_back_offset:
  624. # Account for the fact that < 2.7 has an explicit
  625. # POP_TOP instruction in the equivalate POP_JUMP_IF
  626. # construct
  627. if self.version < (2, 7):
  628. jump_forward_offset = jump_back_offset + 4
  629. return_val_offset1 = self.prev[
  630. self.prev[self.prev[loop_end_offset]]
  631. ]
  632. # Is jump back really "back"?
  633. jump_target = self.get_target(
  634. jump_back_offset, code[jump_back_offset]
  635. )
  636. if jump_target > jump_back_offset or code[jump_back_offset + 3] in [
  637. self.opc.JUMP_FORWARD,
  638. self.opc.JUMP_ABSOLUTE,
  639. ]:
  640. jump_back_offset = None
  641. pass
  642. else:
  643. jump_forward_offset = jump_back_offset + 3
  644. return_val_offset1 = self.prev[self.prev[loop_end_offset]]
  645. if (
  646. jump_back_offset
  647. and jump_back_offset != self.prev[loop_end_offset]
  648. and code[jump_forward_offset] in self.jump_forward
  649. ):
  650. if code[self.prev[loop_end_offset]] == self.opc.RETURN_VALUE or (
  651. code[self.prev[loop_end_offset]] == self.opc.POP_BLOCK
  652. and code[return_val_offset1] == self.opc.RETURN_VALUE
  653. ):
  654. jump_back_offset = None
  655. if not jump_back_offset:
  656. # loop suite ends in return
  657. # scanner26 of wbiti had:
  658. # jump_back_offset = self.last_instr(start, loop_end_offset, self.opc.JUMP_ABSOLUTE, start, False)
  659. jump_back_offset = self.last_instr(
  660. start, loop_end_offset, self.opc.RETURN_VALUE
  661. )
  662. if not jump_back_offset:
  663. return
  664. jump_back_offset += 1
  665. if_offset = None
  666. if self.version < (2, 7):
  667. # Look for JUMP_IF POP_TOP ...
  668. if code[self.prev[next_line_byte]] == self.opc.POP_TOP and (
  669. code[self.prev[self.prev[next_line_byte]]] in self.pop_jump_if
  670. ):
  671. if_offset = self.prev[self.prev[next_line_byte]]
  672. elif code[self.prev[next_line_byte]] in self.pop_jump_if:
  673. # Look for POP_JUMP_IF ...
  674. if_offset = self.prev[next_line_byte]
  675. if if_offset:
  676. loop_type = "while"
  677. self.ignore_if.add(if_offset)
  678. if self.version < (2, 7) and (
  679. code[self.prev[jump_back_offset]] == self.opc.RETURN_VALUE
  680. ):
  681. self.ignore_if.add(self.prev[jump_back_offset])
  682. pass
  683. pass
  684. else:
  685. loop_type = "for"
  686. setup_target = next_line_byte
  687. loop_end_offset = jump_back_offset + 3
  688. else:
  689. # We have a loop with a jump-back instruction
  690. if self.get_target(jump_back_offset) >= next_line_byte:
  691. jump_back_offset = self.last_instr(
  692. start, loop_end_offset, self.opc.JUMP_ABSOLUTE, start, False
  693. )
  694. if (
  695. loop_end_offset > jump_back_offset + 4
  696. and code[loop_end_offset] in self.jump_forward
  697. ):
  698. if code[jump_back_offset + 4] in self.jump_forward:
  699. if self.get_target(jump_back_offset + 4) == self.get_target(
  700. loop_end_offset
  701. ):
  702. self.fixed_jumps[offset] = jump_back_offset + 4
  703. loop_end_offset = jump_back_offset + 4
  704. elif setup_target < offset:
  705. self.fixed_jumps[offset] = jump_back_offset + 4
  706. loop_end_offset = jump_back_offset + 4
  707. setup_target = self.get_target(jump_back_offset, self.opc.JUMP_ABSOLUTE)
  708. if self.version > (2, 1) and code[setup_target] in (
  709. self.opc.FOR_ITER,
  710. self.opc.GET_ITER,
  711. ):
  712. loop_type = "for"
  713. else:
  714. loop_type = "while"
  715. # Look for a test condition immediately after the
  716. # SETUP_LOOP while
  717. if (
  718. self.version < (2, 7)
  719. and self.code[self.prev[next_line_byte]] == self.opc.POP_TOP
  720. ):
  721. test_op_offset = self.prev[self.prev[next_line_byte]]
  722. else:
  723. test_op_offset = self.prev[next_line_byte]
  724. if test_op_offset == offset:
  725. loop_type = "while 1"
  726. elif self.code[test_op_offset] in self.opc.JUMP_OPs:
  727. test_target = self.get_target(test_op_offset)
  728. self.ignore_if.add(test_op_offset)
  729. if test_target > (jump_back_offset + 3):
  730. jump_back_offset = test_target
  731. self.not_continue.add(jump_back_offset)
  732. self.loops.append(setup_target)
  733. self.structs.append(
  734. {
  735. "type": loop_type + "-loop",
  736. "start": setup_target,
  737. "end": jump_back_offset,
  738. }
  739. )
  740. if jump_back_offset + 3 != loop_end_offset:
  741. self.structs.append(
  742. {
  743. "type": loop_type + "-else",
  744. "start": jump_back_offset + 3,
  745. "end": loop_end_offset,
  746. }
  747. )
  748. elif op == self.opc.SETUP_EXCEPT:
  749. start = offset + instruction_size(op, self.opc)
  750. target = self.get_target(offset, op)
  751. end_offset = self.restrict_to_parent(target, parent)
  752. if target != end_offset:
  753. self.fixed_jumps[offset] = end_offset
  754. # print target, end, parent
  755. # Add the try block
  756. self.structs.append(
  757. {"type": "try", "start": start - 3, "end": end_offset - 4}
  758. )
  759. # Now isolate the except and else blocks
  760. end_else = start_else = self.get_target(self.prev[end_offset])
  761. end_finally_offset = end_offset
  762. setup_except_nest = 0
  763. while end_finally_offset < len(self.code):
  764. if self.code[end_finally_offset] == self.opc.END_FINALLY:
  765. if setup_except_nest == 0:
  766. break
  767. else:
  768. setup_except_nest -= 1
  769. elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT:
  770. setup_except_nest += 1
  771. end_finally_offset += instruction_size(
  772. code[end_finally_offset], self.opc
  773. )
  774. pass
  775. # Add the except blocks
  776. i = end_offset
  777. while i < len(self.code) and i < end_finally_offset:
  778. jmp = self.next_except_jump(i)
  779. if jmp is None: # check
  780. i = self.next_stmt[i]
  781. continue
  782. if self.code[jmp] == self.opc.RETURN_VALUE:
  783. self.structs.append({"type": "except", "start": i, "end": jmp + 1})
  784. i = jmp + 1
  785. else:
  786. target = self.get_target(jmp)
  787. if target != start_else:
  788. end_else = self.get_target(jmp)
  789. if self.code[jmp] == self.opc.JUMP_FORWARD:
  790. if self.version <= (2, 6):
  791. self.fixed_jumps[jmp] = target
  792. else:
  793. self.fixed_jumps[jmp] = -1
  794. self.structs.append({"type": "except", "start": i, "end": jmp})
  795. i = jmp + 3
  796. # Add the try-else block
  797. if end_else != start_else:
  798. r_end_else = self.restrict_to_parent(end_else, parent)
  799. # May be able to drop the 2.7 test.
  800. if self.version[:2] == (2, 7):
  801. self.structs.append(
  802. {"type": "try-else", "start": i + 1, "end": r_end_else}
  803. )
  804. self.fixed_jumps[i] = r_end_else
  805. else:
  806. self.fixed_jumps[i] = i + 1
  807. elif op in self.pop_jump_if:
  808. target = self.get_target(offset, op)
  809. rtarget = self.restrict_to_parent(target, parent)
  810. # Do not let jump to go out of parent struct bounds
  811. if target != rtarget and parent["type"] == "and/or":
  812. self.fixed_jumps[offset] = rtarget
  813. return
  814. jump_if_offset = offset
  815. start = offset + 3
  816. pre = self.prev
  817. # Does this jump to right after another conditional jump that is
  818. # not myself? If so, it's part of a larger conditional.
  819. # rocky: if we have a conditional jump to the next instruction, then
  820. # possibly I am "skipping over" a "pass" or null statement.
  821. test_target = target
  822. if self.version < (2, 7):
  823. # Before 2.7 we have to deal with the fact that there is an extra
  824. # POP_TOP that is logically associated with the JUMP_IF's (even though
  825. # the instance set is called "self.pop_jump_if")
  826. if code[pre[test_target]] == self.opc.POP_TOP:
  827. test_target = pre[test_target]
  828. test_set = self.pop_jump_if
  829. else:
  830. test_set = self.pop_jump_if_or_pop | self.pop_jump_if
  831. if code[pre[test_target]] in test_set and target > offset:
  832. # We have POP_JUMP_IF... target
  833. # ...
  834. # pre: POP_JUMP_IF ...
  835. # target: ...
  836. #
  837. # We will take that as either as "and" or "or".
  838. self.fixed_jumps[offset] = pre[target]
  839. self.structs.append(
  840. {"type": "and/or", "start": start, "end": pre[target]}
  841. )
  842. return
  843. # The instruction offset just before the target jump offset is important
  844. # in making a determination of what we have. Save that.
  845. pre_rtarget = pre[rtarget]
  846. # Is it an "and" inside an "if" or "while" block
  847. if op == self.opc.PJIF:
  848. # Search for other POP_JUMP_IF_...'s targeting the
  849. # same target, of the current POP_JUMP_... instruction,
  850. # starting from current offset, and filter everything inside inner 'or'
  851. # jumps and mid-line ifs
  852. match = self.rem_or(
  853. start, self.next_stmt[offset], self.opc.PJIF, target
  854. )
  855. # If we still have any offsets in set, start working on it
  856. if match:
  857. if (
  858. code[pre_rtarget] in self.jump_forward
  859. and pre_rtarget not in self.stmts
  860. and self.restrict_to_parent(
  861. self.get_target(pre_rtarget), parent
  862. )
  863. == rtarget
  864. ):
  865. if (
  866. code[pre[pre_rtarget]] == self.opc.JUMP_ABSOLUTE
  867. and self.remove_mid_line_ifs([offset])
  868. and target == self.get_target(pre[pre_rtarget])
  869. and (
  870. pre[pre_rtarget] not in self.stmts
  871. or self.get_target(pre[pre_rtarget]) > pre[pre_rtarget]
  872. )
  873. and 1
  874. == len(
  875. self.remove_mid_line_ifs(
  876. self.rem_or(
  877. start,
  878. pre[pre_rtarget],
  879. self.pop_jump_if,
  880. target,
  881. )
  882. )
  883. )
  884. ):
  885. pass
  886. elif (
  887. code[pre[pre_rtarget]] == self.opc.RETURN_VALUE
  888. and self.remove_mid_line_ifs([offset])
  889. and 1
  890. == (
  891. len(
  892. set(
  893. self.remove_mid_line_ifs(
  894. self.rem_or(
  895. start,
  896. pre[pre_rtarget],
  897. self.pop_jump_if,
  898. target,
  899. )
  900. )
  901. )
  902. | set(
  903. self.remove_mid_line_ifs(
  904. self.rem_or(
  905. start,
  906. pre[pre_rtarget],
  907. (
  908. self.opc.PJIF,
  909. self.opc.PJIT,
  910. self.opc.JUMP_ABSOLUTE,
  911. ),
  912. pre_rtarget,
  913. True,
  914. )
  915. )
  916. )
  917. )
  918. )
  919. ):
  920. pass
  921. else:
  922. fix = None
  923. jump_ifs = self.all_instr(
  924. start, self.next_stmt[offset], self.opc.PJIF
  925. )
  926. last_jump_good = True
  927. for j in jump_ifs:
  928. if target == self.get_target(j):
  929. if self.lines[j].next == j + 3 and last_jump_good:
  930. fix = j
  931. break
  932. else:
  933. last_jump_good = False
  934. self.fixed_jumps[offset] = fix or match[-1]
  935. return
  936. else:
  937. if self.version < (2, 7) and parent["type"] in (
  938. "root",
  939. "for-loop",
  940. "if-then",
  941. "else",
  942. "try",
  943. ):
  944. self.fixed_jumps[offset] = rtarget
  945. else:
  946. # note test for < 2.7 might be superfluous although informative
  947. # for 2.7 a different branch is taken and the below code is handled
  948. # under: elif op in self.pop_jump_if_or_pop
  949. # below
  950. self.fixed_jumps[offset] = match[-1]
  951. return
  952. else: # op != self.opc.PJIT
  953. if self.version < (2, 7) and code[offset + 3] == self.opc.POP_TOP:
  954. assert_offset = offset + 4
  955. else:
  956. assert_offset = offset + 3
  957. if (assert_offset) in self.load_asserts:
  958. if code[pre_rtarget] == self.opc.RAISE_VARARGS:
  959. return
  960. self.load_asserts.remove(assert_offset)
  961. next = self.next_stmt[offset]
  962. if pre[next] == offset:
  963. pass
  964. elif code[next] in self.jump_forward and target == self.get_target(
  965. next
  966. ):
  967. if code[pre[next]] == self.opc.PJIF:
  968. if (
  969. code[next] == self.opc.JUMP_FORWARD
  970. or target != rtarget
  971. or code[pre[pre_rtarget]]
  972. not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)
  973. ):
  974. self.fixed_jumps[offset] = pre[next]
  975. return
  976. elif (
  977. code[next] == self.opc.JUMP_ABSOLUTE
  978. and code[target] in self.jump_forward
  979. ):
  980. next_target = self.get_target(next)
  981. if self.get_target(target) == next_target:
  982. self.fixed_jumps[offset] = pre[next]
  983. return
  984. elif code[next_target] in self.jump_forward and self.get_target(
  985. next_target
  986. ) == self.get_target(target):
  987. self.fixed_jumps[offset] = pre[next]
  988. return
  989. # don't add a struct for a while test, it's already taken care of
  990. if offset in self.ignore_if:
  991. return
  992. if self.version == (2, 7):
  993. if (
  994. code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
  995. and pre_rtarget in self.stmts
  996. and pre_rtarget != offset
  997. and pre[pre_rtarget] != offset
  998. ):
  999. if (
  1000. code[rtarget] == self.opc.JUMP_ABSOLUTE
  1001. and code[rtarget + 3] == self.opc.POP_BLOCK
  1002. ):
  1003. if code[pre[pre_rtarget]] != self.opc.JUMP_ABSOLUTE:
  1004. pass
  1005. elif self.get_target(pre[pre_rtarget]) != target:
  1006. pass
  1007. else:
  1008. rtarget = pre_rtarget
  1009. else:
  1010. rtarget = pre_rtarget
  1011. pre_rtarget = pre[rtarget]
  1012. # Does the "jump if" jump beyond a jump op?
  1013. # That is, we have something like:
  1014. # POP_JUMP_IF_FALSE HERE
  1015. # ...
  1016. # JUMP_FORWARD
  1017. # HERE:
  1018. #
  1019. # If so, this can be a block inside an "if" statement
  1020. # or a conditional assignment like:
  1021. # x = 1 if x else 2
  1022. #
  1023. # There are other situations we may need to consider, like
  1024. # if the condition jump is to a forward location.
  1025. # Also the existence of a jump to the instruction after "END_FINALLY"
  1026. # will distinguish "try/else" from "try".
  1027. code_pre_rtarget = code[pre_rtarget]
  1028. if code_pre_rtarget in self.jump_forward:
  1029. if_end = self.get_target(pre_rtarget)
  1030. # Is this a loop and not an "if" statement?
  1031. if (if_end < pre_rtarget) and (pre[if_end] in self.setup_loop_targets):
  1032. if if_end > start:
  1033. return
  1034. else:
  1035. # We still have the case in 2.7 that the next instruction
  1036. # is a jump to a SETUP_LOOP target.
  1037. next_offset = target + instruction_size(
  1038. self.code[target], self.opc
  1039. )
  1040. next_op = self.code[next_offset]
  1041. if self.op_name(next_op) == "JUMP_FORWARD":
  1042. jump_target = self.get_target(next_offset, next_op)
  1043. if jump_target in self.setup_loops:
  1044. self.structs.append(
  1045. {
  1046. "type": "while-loop",
  1047. "start": jump_if_offset,
  1048. "end": jump_target,
  1049. }
  1050. )
  1051. self.fixed_jumps[jump_if_offset] = jump_target
  1052. return
  1053. end_offset = self.restrict_to_parent(if_end, parent)
  1054. if_then_maybe = None
  1055. if (2, 2) <= self.version <= (2, 6):
  1056. # Take the JUMP_IF target. In an "if/then", it will be
  1057. # a POP_TOP instruction and the instruction before it
  1058. # will be a JUMP_FORWARD to just after the POP_TOP.
  1059. # For example:
  1060. # Good:
  1061. # 3 JUMP_IF_FALSE 33 'to 39'
  1062. # ..
  1063. # 36 JUMP_FORWARD 1 'to 40'
  1064. # 39 POP_TOP
  1065. # 40 ...
  1066. # example:
  1067. # BAD (is an "and"):
  1068. # 28 JUMP_IF_FALSE 4 'to 35'
  1069. # ...
  1070. # 32 JUMP_ABSOLUTE 40 'to 40' # should be 36 or there should
  1071. # # be a COME_FROM at the pop top
  1072. # # before 40 to 35
  1073. # 35 POP_TOP
  1074. # 36 ...
  1075. # 39 POP_TOP
  1076. # 39_0 COME_FROM 3
  1077. # 40 ...
  1078. if self.opname_for_offset(jump_if_offset).startswith("JUMP_IF"):
  1079. jump_if_target = code[jump_if_offset + 1]
  1080. if (
  1081. self.opname_for_offset(jump_if_target + jump_if_offset + 3)
  1082. == "POP_TOP"
  1083. ):
  1084. jump_inst = jump_if_target + jump_if_offset
  1085. jump_offset = code[jump_inst + 1]
  1086. jump_op = self.opname_for_offset(jump_inst)
  1087. if jump_op == "JUMP_FORWARD" and jump_offset == 1:
  1088. self.structs.append(
  1089. {
  1090. "type": "if-then",
  1091. "start": start - 3,
  1092. "end": pre_rtarget,
  1093. }
  1094. )
  1095. self.thens[start] = end_offset
  1096. elif jump_op == "JUMP_ABSOLUTE":
  1097. if_then_maybe = {
  1098. "type": "if-then",
  1099. "start": start - 3,
  1100. "end": pre_rtarget,
  1101. }
  1102. elif self.version[:2] == (2, 7):
  1103. self.structs.append(
  1104. {"type": "if-then", "start": start - 3, "end": pre_rtarget}
  1105. )
  1106. # FIXME: this is yet another case were we need dominators.
  1107. if pre_rtarget not in self.linestarts or self.version < (2, 7):
  1108. self.not_continue.add(pre_rtarget)
  1109. if rtarget < end_offset:
  1110. # We have an "else" block of some kind.
  1111. # Is it associated with "if_then_maybe" seen above?
  1112. # These will be linked in this funny way:
  1113. # 198 JUMP_IF_FALSE 18 'to 219'
  1114. # 201 POP_TOP
  1115. # ...
  1116. # 216 JUMP_ABSOLUTE 256 'to 256'
  1117. # 219 POP_TOP
  1118. # ...
  1119. # 252 JUMP_FORWARD 1 'to 256'
  1120. # 255 POP_TOP
  1121. # 256
  1122. if if_then_maybe and jump_op == "JUMP_ABSOLUTE":
  1123. jump_target = self.get_target(jump_inst, code[jump_inst])
  1124. if self.opname_for_offset(end_offset) == "JUMP_FORWARD":
  1125. end_target = self.get_target(end_offset, code[end_offset])
  1126. if jump_target == end_target:
  1127. self.structs.append(if_then_maybe)
  1128. self.thens[start] = end_offset
  1129. self.structs.append(
  1130. {"type": "else", "start": rtarget, "end": end_offset}
  1131. )
  1132. elif code_pre_rtarget == self.opc.RETURN_VALUE:
  1133. if self.version[:2] == (2, 7) or pre_rtarget not in self.ignore_if:
  1134. # Below, 10 is exception-match. If there is an exception
  1135. # match in the compare, then this is an exception
  1136. # clause not an if-then clause
  1137. if (
  1138. self.code[self.prev[offset]] != self.opc.COMPARE_OP
  1139. or self.code[self.prev[offset] + 1] != 10
  1140. ):
  1141. self.structs.append(
  1142. {"type": "if-then", "start": start, "end": rtarget}
  1143. )
  1144. self.thens[start] = rtarget
  1145. if (
  1146. self.version[:2] == (2, 7)
  1147. or code[pre_rtarget + 1] != self.opc.JUMP_FORWARD
  1148. ):
  1149. # The below is a big hack until we get
  1150. # better control flow analysis: disallow
  1151. # END_IF if the instruction before the
  1152. # END_IF instruction happens to be a jump
  1153. # target. In this case, probably what's
  1154. # gone on is that we messed up on the
  1155. # END_IF location and it should be the
  1156. # instruction before.
  1157. self.fixed_jumps[offset] = rtarget
  1158. if (
  1159. self.version[:2] == (2, 7)
  1160. and self.insts[
  1161. self.offset2inst_index[pre[pre_rtarget]]
  1162. ].is_jump_target
  1163. ):
  1164. self.return_end_ifs.add(pre[pre_rtarget])
  1165. pass
  1166. else:
  1167. self.return_end_ifs.add(pre_rtarget)
  1168. pass
  1169. pass
  1170. pass
  1171. elif op in self.pop_jump_if_or_pop:
  1172. target = self.get_target(offset, op)
  1173. self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
  1174. def find_jump_targets(self, debug):
  1175. """
  1176. Detect all offsets in a byte code which are jump targets
  1177. where we might insert a pseudo "COME_FROM" instruction.
  1178. "COME_FROM" instructions are used in detecting overall
  1179. control flow. The more detailed information about the
  1180. control flow is captured in self.structs.
  1181. Since this stuff is tricky, consult self.structs when
  1182. something goes amiss.
  1183. Return the list of offsets. An instruction can be jumped
  1184. to in from multiple instructions.
  1185. """
  1186. code = self.code
  1187. n = len(code)
  1188. self.structs = [{"type": "root", "start": 0, "end": n - 1}]
  1189. # All loop entry points
  1190. self.loops = []
  1191. # Map fixed jumps to their real destination
  1192. self.fixed_jumps = {}
  1193. self.ignore_if = set()
  1194. self.build_statement_indices()
  1195. # Containers filled by detect_control_flow()
  1196. self.not_continue = set()
  1197. self.return_end_ifs = set()
  1198. self.setup_loop_targets = {} # target given setup_loop offset
  1199. self.setup_loops = {} # setup_loop offset given target
  1200. self.thens = {} # JUMP_IF's that separate the 'then' part of an 'if'
  1201. targets = {}
  1202. extended_arg = 0
  1203. for offset in self.op_range(0, n):
  1204. op = code[offset]
  1205. if op == self.opc.EXTENDED_ARG:
  1206. arg = code2num(code, offset + 1) | extended_arg
  1207. extended_arg += self.extended_arg_val(arg)
  1208. continue
  1209. # Determine structures and fix jumps in Python versions
  1210. # since 2.3
  1211. self.detect_control_flow(offset, op, extended_arg)
  1212. if op_has_argument(op, self.opc):
  1213. label = self.fixed_jumps.get(offset)
  1214. oparg = self.get_argument(offset)
  1215. if label is None:
  1216. if op in self.opc.JREL_OPS and self.op_name(op) != "FOR_ITER":
  1217. # if (op in self.opc.JREL_OPS and
  1218. # (self.version < 2.0 or op != self.opc.FOR_ITER)):
  1219. label = offset + 3 + oparg
  1220. elif self.version[:2] == (2, 7) and op in self.opc.JABS_OPS:
  1221. if op in (
  1222. self.opc.JUMP_IF_FALSE_OR_POP,
  1223. self.opc.JUMP_IF_TRUE_OR_POP,
  1224. ):
  1225. if oparg > offset:
  1226. label = oparg
  1227. pass
  1228. pass
  1229. # FIXME FIXME FIXME
  1230. # All the conditions are horrible, and I am not sure I
  1231. # understand fully what's going l
  1232. # We REALLY REALLY need a better way to handle control flow
  1233. # Especially for < 2.7
  1234. if label is not None and label != -1:
  1235. if self.version[:2] == (2, 7):
  1236. # FIXME: rocky: I think we need something like this...
  1237. if label in self.setup_loops:
  1238. source = self.setup_loops[label]
  1239. else:
  1240. source = offset
  1241. targets[label] = targets.get(label, []) + [source]
  1242. elif not (
  1243. code[label] == self.opc.POP_TOP
  1244. and code[self.prev[label]] == self.opc.RETURN_VALUE
  1245. ):
  1246. # In Python < 2.7, don't add a COME_FROM, for:
  1247. # ~RETURN_VALUE POP_TOP .. END_FINALLY
  1248. # or:
  1249. # ~RETURN_VALUE POP_TOP .. POP_TOP END_FINALLY
  1250. skip_come_from = code[offset + 3] == self.opc.END_FINALLY or (
  1251. code[offset + 3] == self.opc.POP_TOP
  1252. and code[offset + 4] == self.opc.END_FINALLY
  1253. )
  1254. # The below is for special try/else handling
  1255. if skip_come_from and op == self.opc.JUMP_FORWARD:
  1256. skip_come_from = False
  1257. if not skip_come_from:
  1258. # FIXME: rocky: I think we need something like this...
  1259. if offset not in set(self.ignore_if):
  1260. if label in self.setup_loops:
  1261. source = self.setup_loops[label]
  1262. else:
  1263. source = offset
  1264. # FIXME: The grammar for 2.6 and before doesn't
  1265. # handle COME_FROM's from a loop inside if's
  1266. # It probably should.
  1267. if (
  1268. self.version > (2, 6)
  1269. or self.code[source] != self.opc.SETUP_LOOP
  1270. or self.code[label] != self.opc.JUMP_FORWARD
  1271. ):
  1272. targets[label] = targets.get(label, []) + [source]
  1273. pass
  1274. pass
  1275. pass
  1276. pass
  1277. elif (
  1278. op == self.opc.END_FINALLY
  1279. and offset in self.fixed_jumps
  1280. and self.version[:2] == (2, 7)
  1281. ):
  1282. label = self.fixed_jumps[offset]
  1283. targets[label] = targets.get(label, []) + [offset]
  1284. pass
  1285. extended_arg = 0
  1286. pass # for loop
  1287. # DEBUG:
  1288. if debug in ("both", "after"):
  1289. print(targets)
  1290. import pprint as pp
  1291. pp.pprint(self.structs)
  1292. return targets
  1293. def patch_continue(self, tokens, offset, op):
  1294. if op in (self.opc.JUMP_FORWARD, self.opc.JUMP_ABSOLUTE):
  1295. # FIXME: this is a hack to catch stuff like:
  1296. # for ...
  1297. # try: ...
  1298. # except: continue
  1299. # the "continue" is not on a new line.
  1300. n = len(tokens)
  1301. if (
  1302. n > 2
  1303. and tokens[-1].kind == "JUMP_BACK"
  1304. and self.code[offset + 3] == self.opc.END_FINALLY
  1305. ):
  1306. tokens[-1].kind = intern("CONTINUE")
  1307. # FIXME: combine with scanner3.py code and put into scanner.py
  1308. def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
  1309. """
  1310. Find all <instr> in the block from start to end.
  1311. <instr> is any python bytecode instruction or a list of opcodes
  1312. If <instr> is an opcode with a target (like a jump), a target
  1313. destination can be specified which must match precisely.
  1314. Return a list with indexes to them or [] if none found.
  1315. """
  1316. assert start >= 0 and end <= len(self.code) and start <= end
  1317. try:
  1318. None in instr
  1319. except:
  1320. instr = [instr]
  1321. instr_offsets = []
  1322. for i in self.op_range(start, end):
  1323. op = self.code[i]
  1324. if op in instr:
  1325. if target is None:
  1326. instr_offsets.append(i)
  1327. else:
  1328. t = self.get_target(i, op)
  1329. if include_beyond_target and t >= target:
  1330. instr_offsets.append(i)
  1331. elif t == target:
  1332. instr_offsets.append(i)
  1333. pjits = self.all_instr(start, end, self.opc.PJIT)
  1334. filtered = []
  1335. for pjit in pjits:
  1336. tgt = self.get_target(pjit) - 3
  1337. for i in instr_offsets:
  1338. if i <= pjit or i >= tgt:
  1339. filtered.append(i)
  1340. instr_offsets = filtered
  1341. filtered = []
  1342. return instr_offsets