scanner3.py 63 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579
  1. # Copyright (c) 2015-2019, 2021-2024 by Rocky Bernstein
  2. # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
  3. # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. """
  18. Python 3 Generic bytecode scanner/deparser
  19. This overlaps various Python3's dis module, but it can be run from
  20. Python versions other than the version running this code. Notably,
  21. run from Python version 2.
  22. Also we *modify* the instruction sequence to assist deparsing code.
  23. For example:
  24. - we add "COME_FROM" instructions to help in figuring out
  25. conditional branching and looping.
  26. - LOAD_CONSTs are classified further into the type of thing
  27. they load:
  28. lambda's, genexpr's, {dict,set,list} comprehension's,
  29. - PARAMETER counts appended {CALL,MAKE}_FUNCTION, BUILD_{TUPLE,SET,SLICE}
  30. Finally we save token information.
  31. """
  32. from __future__ import print_function
  33. import sys
  34. from typing import Optional, Tuple
  35. import xdis
  36. # Get all the opcodes into globals
  37. import xdis.opcodes.opcode_33 as op3
  38. from xdis import Instruction, instruction_size, iscode
  39. from xdis.bytecode import _get_const_info
  40. from xdis.opcodes.opcode_3x import parse_fn_counts_30_35
  41. from uncompyle6.scanner import CONST_COLLECTIONS, Scanner
  42. from uncompyle6.scanners.tok import Token
  43. from uncompyle6.util import get_code_name
  44. intern = sys.intern
  45. globals().update(op3.opmap)
  46. class Scanner3(Scanner):
  47. def __init__(self, version, show_asm=None, is_pypy=False):
  48. super(Scanner3, self).__init__(version, show_asm, is_pypy)
  49. # Create opcode classification sets
  50. # Note: super initialization above initializes self.opc
  51. # For ops that start SETUP_ ... we will add COME_FROM with these names
  52. # at the their targets.
  53. # Some blocks and END_ statements. And they can start
  54. # a new statement
  55. if self.version < (3, 8):
  56. setup_ops = [
  57. self.opc.SETUP_LOOP,
  58. self.opc.SETUP_EXCEPT,
  59. self.opc.SETUP_FINALLY,
  60. ]
  61. self.setup_ops_no_loop = frozenset(setup_ops) - frozenset(
  62. [self.opc.SETUP_LOOP]
  63. )
  64. else:
  65. setup_ops = [self.opc.SETUP_FINALLY]
  66. self.setup_ops_no_loop = frozenset(setup_ops)
  67. if self.version >= (3, 2):
  68. setup_ops.append(self.opc.SETUP_WITH)
  69. self.setup_ops = frozenset(setup_ops)
  70. if self.version[:2] == (3, 0):
  71. self.pop_jump_tf = frozenset(
  72. [self.opc.JUMP_IF_FALSE, self.opc.JUMP_IF_TRUE]
  73. )
  74. self.not_continue_follow = ("END_FINALLY", "POP_BLOCK", "POP_TOP")
  75. else:
  76. self.pop_jump_tf = frozenset([self.opc.PJIF, self.opc.PJIT])
  77. self.not_continue_follow = ("END_FINALLY", "POP_BLOCK")
  78. # Opcodes that can start a statement.
  79. statement_opcodes = [
  80. self.opc.POP_BLOCK,
  81. self.opc.STORE_FAST,
  82. self.opc.DELETE_FAST,
  83. self.opc.STORE_DEREF,
  84. self.opc.STORE_GLOBAL,
  85. self.opc.DELETE_GLOBAL,
  86. self.opc.STORE_NAME,
  87. self.opc.DELETE_NAME,
  88. self.opc.STORE_ATTR,
  89. self.opc.DELETE_ATTR,
  90. self.opc.STORE_SUBSCR,
  91. self.opc.POP_TOP,
  92. self.opc.DELETE_SUBSCR,
  93. self.opc.END_FINALLY,
  94. self.opc.RETURN_VALUE,
  95. self.opc.RAISE_VARARGS,
  96. self.opc.PRINT_EXPR,
  97. self.opc.JUMP_ABSOLUTE,
  98. ]
  99. if self.version < (3, 8):
  100. statement_opcodes += [self.opc.BREAK_LOOP, self.opc.CONTINUE_LOOP]
  101. self.statement_opcodes = frozenset(statement_opcodes) | self.setup_ops_no_loop
  102. # Opcodes that can start a "store" non-terminal.
  103. # FIXME: JUMP_ABSOLUTE is weird. What's up with that?
  104. self.designator_ops = frozenset(
  105. [
  106. self.opc.STORE_FAST,
  107. self.opc.STORE_NAME,
  108. self.opc.STORE_GLOBAL,
  109. self.opc.STORE_DEREF,
  110. self.opc.STORE_ATTR,
  111. self.opc.STORE_SUBSCR,
  112. self.opc.UNPACK_SEQUENCE,
  113. self.opc.JUMP_ABSOLUTE,
  114. self.opc.UNPACK_EX,
  115. ]
  116. )
  117. if self.version > (3, 0):
  118. self.jump_if_pop = frozenset(
  119. [self.opc.JUMP_IF_FALSE_OR_POP, self.opc.JUMP_IF_TRUE_OR_POP]
  120. )
  121. self.pop_jump_if_pop = frozenset(
  122. [
  123. self.opc.JUMP_IF_FALSE_OR_POP,
  124. self.opc.JUMP_IF_TRUE_OR_POP,
  125. self.opc.POP_JUMP_IF_TRUE,
  126. self.opc.POP_JUMP_IF_FALSE,
  127. ]
  128. )
  129. # Not really a set, but still clasification-like
  130. self.statement_opcode_sequences = [
  131. (self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_FORWARD),
  132. (self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_ABSOLUTE),
  133. (self.opc.POP_JUMP_IF_TRUE, self.opc.JUMP_FORWARD),
  134. (self.opc.POP_JUMP_IF_TRUE, self.opc.JUMP_ABSOLUTE),
  135. ]
  136. else:
  137. self.jump_if_pop = frozenset([])
  138. self.pop_jump_if_pop = frozenset([])
  139. # Not really a set, but still clasification-like
  140. self.statement_opcode_sequences = [
  141. (self.opc.JUMP_FORWARD,),
  142. (self.opc.JUMP_ABSOLUTE,),
  143. (self.opc.JUMP_FORWARD,),
  144. (self.opc.JUMP_ABSOLUTE,),
  145. ]
  146. # FIXME: remove this and use instead info from xdis.
  147. # Opcodes that take a variable number of arguments
  148. # (expr's)
  149. varargs_ops = set(
  150. [
  151. self.opc.BUILD_LIST,
  152. self.opc.BUILD_TUPLE,
  153. self.opc.BUILD_SET,
  154. self.opc.BUILD_SLICE,
  155. self.opc.BUILD_MAP,
  156. self.opc.UNPACK_SEQUENCE,
  157. self.opc.RAISE_VARARGS,
  158. ]
  159. )
  160. if is_pypy or self.version >= (3, 7):
  161. varargs_ops.add(self.opc.CALL_METHOD)
  162. if self.version >= (3, 5):
  163. varargs_ops |= set(
  164. [
  165. self.opc.BUILD_SET_UNPACK,
  166. self.opc.BUILD_MAP_UNPACK, # we will handle this later
  167. self.opc.BUILD_LIST_UNPACK,
  168. self.opc.BUILD_TUPLE_UNPACK,
  169. ]
  170. )
  171. if self.version >= (3, 6):
  172. varargs_ops.add(self.opc.BUILD_CONST_KEY_MAP)
  173. # Below is in bit order, "default = bit 0, closure = bit 3
  174. self.MAKE_FUNCTION_FLAGS = tuple(
  175. """
  176. default keyword-only annotation closure""".split()
  177. )
  178. self.varargs_ops = frozenset(varargs_ops)
  179. # FIXME: remove the above in favor of:
  180. # self.varargs_ops = frozenset(self.opc.hasvargs)
  181. return
  182. def bound_collection_from_inst(
  183. self,
  184. insts: list,
  185. next_tokens: list,
  186. inst: Instruction,
  187. t: Token,
  188. i: int,
  189. collection_type: str,
  190. ) -> Optional[list]:
  191. """
  192. Try to replace a sequence of instruction that ends with a
  193. BUILD_xxx with a sequence that can be parsed much faster, but
  194. inserting the token boundary at the beginning of the sequence.
  195. """
  196. count = t.attr
  197. assert isinstance(count, int)
  198. assert count <= i
  199. if collection_type == "CONST_DICT":
  200. # constant dictionaries work via BUILD_CONST_KEY_MAP and
  201. # handle the values() like sets and lists.
  202. # However the keys() are an LOAD_CONST of the keys.
  203. # adjust offset to account for this
  204. count += 1
  205. # For small lists don't bother
  206. if count < 5:
  207. return None
  208. collection_start = i - count
  209. for j in range(collection_start, i):
  210. if insts[j].opname not in (
  211. "LOAD_ASSERT",
  212. "LOAD_CODE",
  213. "LOAD_CONST",
  214. "LOAD_FAST",
  215. "LOAD_GLOBAL",
  216. "LOAD_NAME",
  217. "LOAD_STR",
  218. ):
  219. return None
  220. collection_enum = CONST_COLLECTIONS.index(collection_type)
  221. # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
  222. # add a boundary marker and change LOAD_CONST to something else
  223. new_tokens = next_tokens[:-count]
  224. start_offset = insts[collection_start].offset
  225. new_tokens.append(
  226. Token(
  227. opname="COLLECTION_START",
  228. attr=collection_enum,
  229. pattr=collection_type,
  230. offset=f"{start_offset}_0",
  231. linestart=False,
  232. has_arg=True,
  233. has_extended_arg=False,
  234. opc=self.opc,
  235. optype="pseudo",
  236. )
  237. )
  238. for j in range(collection_start, i):
  239. new_tokens.append(
  240. Token(
  241. opname="ADD_VALUE",
  242. attr=insts[j].argval,
  243. pattr=insts[j].argrepr,
  244. offset=insts[j].offset,
  245. linestart=insts[j].starts_line,
  246. has_arg=True,
  247. has_extended_arg=False,
  248. opc=self.opc,
  249. optype=insts[j].optype,
  250. )
  251. )
  252. new_tokens.append(
  253. Token(
  254. opname=f"BUILD_{collection_type}",
  255. attr=t.attr,
  256. pattr=t.pattr,
  257. offset=t.offset,
  258. linestart=t.linestart,
  259. has_arg=t.has_arg,
  260. has_extended_arg=False,
  261. opc=t.opc,
  262. optype="pseudo",
  263. )
  264. )
  265. return new_tokens
  266. # Move to scanner35?
  267. def bound_map_from_inst_35(
  268. self, insts: list, next_tokens: list, t: Token, i: int
  269. ) -> Optional[list]:
  270. """
  271. Try to a sequence of instruction that ends with a BUILD_MAP into
  272. a sequence that can be parsed much faster, but inserting the
  273. token boundary at the beginning of the sequence.
  274. """
  275. count = t.attr
  276. assert isinstance(count, int)
  277. if count > i:
  278. return None
  279. # For small lists don't bother
  280. if count < 5:
  281. return None
  282. # Newer Python BUILD_MAP argument's count is a
  283. # key and value pair so it is multiplied by two.
  284. collection_start = i - (count * 2)
  285. assert (count * 2) <= i
  286. for j in range(collection_start, i, 2):
  287. if insts[j].opname not in ("LOAD_CONST",):
  288. return None
  289. if insts[j + 1].opname not in ("LOAD_CONST",):
  290. return None
  291. collection_start = i - (2 * count)
  292. collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
  293. # If we get here, all instructions before tokens[i] are LOAD_CONST and
  294. # we can replace add a boundary marker and change LOAD_CONST to
  295. # something else.
  296. new_tokens = next_tokens[: -(2 * count)]
  297. start_offset = insts[collection_start].offset
  298. new_tokens.append(
  299. Token(
  300. opname="COLLECTION_START",
  301. attr=collection_enum,
  302. pattr="CONST_MAP",
  303. offset=f"{start_offset}_0",
  304. linestart=insts[collection_start].starts_line,
  305. has_arg=True,
  306. has_extended_arg=False,
  307. opc=self.opc,
  308. optype="pseudo",
  309. )
  310. )
  311. for j in range(collection_start, i, 2):
  312. new_tokens.append(
  313. Token(
  314. opname="ADD_KEY",
  315. attr=insts[j].argval,
  316. pattr=insts[j].argrepr,
  317. offset=insts[j].offset,
  318. linestart=insts[j].starts_line,
  319. has_arg=True,
  320. has_extended_arg=False,
  321. opc=self.opc,
  322. optype="pseudo",
  323. )
  324. )
  325. new_tokens.append(
  326. Token(
  327. opname="ADD_VALUE",
  328. attr=insts[j + 1].argval,
  329. pattr=insts[j + 1].argrepr,
  330. offset=insts[j + 1].offset,
  331. linestart=insts[j + 1].starts_line,
  332. has_arg=True,
  333. has_extended_arg=False,
  334. opc=self.opc,
  335. optype="pseudo",
  336. )
  337. )
  338. new_tokens.append(
  339. Token(
  340. opname="BUILD_DICT_OLDER",
  341. attr=t.attr,
  342. pattr=t.pattr,
  343. offset=t.offset,
  344. linestart=t.linestart,
  345. has_arg=t.has_arg,
  346. has_extended_arg=False,
  347. opc=t.opc,
  348. optype="pseudo",
  349. )
  350. )
  351. return new_tokens
  352. def ingest(
  353. self, co, classname=None, code_objects={}, show_asm=None
  354. ) -> Tuple[list, dict]:
  355. """
  356. Create "tokens" the bytecode of an Python code object. Largely these
  357. are the opcode name, but in some cases that has been modified to make parsing
  358. easier.
  359. returning a list of uncompyle6 Token's.
  360. Some transformations are made to assist the deparsing grammar:
  361. - various types of LOAD_CONST's are categorized in terms of what they load
  362. - COME_FROM instructions are added to assist parsing control structures
  363. - operands with stack argument counts or flag masks are appended to the
  364. opcode name, e.g.:
  365. * BUILD_LIST, BUILD_SET
  366. * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional
  367. arguments
  368. - EXTENDED_ARGS instructions are removed
  369. Also, when we encounter certain tokens, we add them to a set
  370. which will cause custom grammar rules. Specifically, variable
  371. arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules
  372. for the specific number of arguments they take.
  373. """
  374. if not show_asm:
  375. show_asm = self.show_asm
  376. bytecode = self.build_instructions(co)
  377. # show_asm = 'both'
  378. if show_asm in ("both", "before"):
  379. print("\n# ---- disassembly:")
  380. bytecode.disassemble_bytes(
  381. co.co_code,
  382. varnames=co.co_varnames,
  383. names=co.co_names,
  384. constants=co.co_consts,
  385. cells=bytecode._cell_names,
  386. line_starts=bytecode._linestarts,
  387. asm_format="extended",
  388. )
  389. # "customize" is in the process of going away here
  390. customize = {}
  391. if self.is_pypy:
  392. customize["PyPy"] = 0
  393. # Scan for assertions. Later we will
  394. # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
  395. # 'LOAD_ASSERT' is used in assert statements.
  396. self.load_asserts = set()
  397. n = len(self.insts)
  398. for i, inst in enumerate(self.insts):
  399. opname = inst.opname
  400. # We need to detect the difference between:
  401. # raise AssertionError
  402. # and
  403. # assert ...
  404. # If we have a JUMP_FORWARD after the
  405. # RAISE_VARARGS then we have a "raise" statement
  406. # else we have an "assert" statement.
  407. if self.version[:2] == (3, 0):
  408. # Like 2.6, 3.0 doesn't have POP_JUMP_IF... so we have
  409. # to go through more machinations
  410. assert_can_follow = opname == "POP_TOP" and i + 1 < n
  411. if assert_can_follow:
  412. prev_inst = self.insts[i - 1]
  413. assert_can_follow = (
  414. prev_inst.opname in ("JUMP_IF_TRUE", "JUMP_IF_FALSE")
  415. and i + 1 < n
  416. )
  417. jump_if_inst = prev_inst
  418. else:
  419. assert_can_follow = (
  420. opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE") and i + 1 < n
  421. )
  422. jump_if_inst = inst
  423. if assert_can_follow:
  424. next_inst = self.insts[i + 1]
  425. if (
  426. next_inst.opname == "LOAD_GLOBAL"
  427. and next_inst.argval == "AssertionError"
  428. and jump_if_inst.argval
  429. ):
  430. raise_idx = self.offset2inst_index[
  431. self.prev_op[jump_if_inst.argval]
  432. ]
  433. raise_inst = self.insts[raise_idx]
  434. if raise_inst.opname.startswith("RAISE_VARARGS"):
  435. self.load_asserts.add(next_inst.offset)
  436. pass
  437. pass
  438. # Get jump targets
  439. # Format: {target offset: [jump offsets]}
  440. jump_targets = self.find_jump_targets(show_asm)
  441. # print("XXX2", jump_targets)
  442. last_op_was_break = False
  443. new_tokens = []
  444. skip_end_offset = None
  445. for i, inst in enumerate(self.insts):
  446. # BUILD_MAP for < 3.5 can skip *forward* in instructions and
  447. # replace them. So we use the below to get up to the position
  448. # scanned and replaced forward
  449. if skip_end_offset and inst.offset <= skip_end_offset:
  450. continue
  451. skip_end_offset = None
  452. opname = inst.opname
  453. argval = inst.argval
  454. pattr = inst.argrepr
  455. t = Token(
  456. opname=opname,
  457. attr=argval,
  458. pattr=pattr,
  459. offset=inst.offset,
  460. linestart=inst.starts_line,
  461. op=inst.opcode,
  462. has_arg=inst.has_arg,
  463. has_extended_arg=inst.has_extended_arg,
  464. opc=self.opc,
  465. )
  466. # things that smash new_tokens like BUILD_LIST have to come first.
  467. if opname in (
  468. "BUILD_CONST_KEY_MAP",
  469. "BUILD_LIST",
  470. "BUILD_SET",
  471. ):
  472. collection_type = (
  473. "DICT"
  474. if opname.startswith("BUILD_CONST_KEY_MAP")
  475. else opname.split("_")[1]
  476. )
  477. try_tokens = self.bound_collection_from_inst(
  478. self.insts, new_tokens, inst, t, i, f"CONST_{collection_type}"
  479. )
  480. if try_tokens is not None:
  481. new_tokens = try_tokens
  482. continue
  483. elif opname in ("BUILD_MAP",):
  484. if self.version >= (3, 5):
  485. try_tokens = self.bound_map_from_inst_35(
  486. self.insts,
  487. new_tokens,
  488. t,
  489. i,
  490. )
  491. if try_tokens is not None:
  492. new_tokens = try_tokens
  493. continue
  494. pass
  495. pass
  496. pass
  497. argval = inst.argval
  498. op = inst.opcode
  499. if opname == "EXTENDED_ARG":
  500. # EXTEND_ARG adjustments to the operand value should have
  501. # already been accounted for in xdis instruction creation.
  502. continue
  503. if inst.offset in jump_targets:
  504. jump_idx = 0
  505. # We want to process COME_FROMs to the same offset to be in *descending*
  506. # offset order so we have the larger range or biggest instruction interval
  507. # last. (I think they are sorted in increasing order, but for safety
  508. # we sort them). That way, specific COME_FROM tags will match up
  509. # properly. For example, a "loop" with an "if" nested in it should have the
  510. # "loop" tag last so the grammar rule matches that properly.
  511. for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
  512. come_from_name = "COME_FROM"
  513. come_from_opname = self.opname_for_offset(jump_offset)
  514. if come_from_opname == "EXTENDED_ARG":
  515. j = xdis.next_offset(op, self.opc, jump_offset)
  516. come_from_opname = self.opname_for_offset(j)
  517. if come_from_opname.startswith("SETUP_"):
  518. come_from_type = come_from_opname[len("SETUP_") :]
  519. come_from_name = "COME_FROM_%s" % come_from_type
  520. pass
  521. elif inst.offset in self.except_targets:
  522. come_from_name = "COME_FROM_EXCEPT_CLAUSE"
  523. new_tokens.append(
  524. Token(
  525. come_from_name,
  526. jump_offset,
  527. repr(jump_offset),
  528. offset="%s_%s" % (inst.offset, jump_idx),
  529. has_arg=True,
  530. opc=self.opc,
  531. )
  532. )
  533. jump_idx += 1
  534. pass
  535. pass
  536. elif inst.offset in self.else_start:
  537. end_offset = self.else_start[inst.offset]
  538. new_tokens.append(
  539. Token(
  540. "ELSE",
  541. None,
  542. repr(end_offset),
  543. offset="%s" % (inst.offset),
  544. has_arg=True,
  545. opc=self.opc,
  546. )
  547. )
  548. pass
  549. if op in self.opc.CONST_OPS:
  550. const = argval
  551. if iscode(const):
  552. co_name = get_code_name(const)
  553. if co_name == "<lambda>":
  554. assert opname == "LOAD_CONST"
  555. opname = "LOAD_LAMBDA"
  556. elif co_name == "<genexpr>":
  557. opname = "LOAD_GENEXPR"
  558. elif co_name == "<dictcomp>":
  559. opname = "LOAD_DICTCOMP"
  560. elif co_name == "<setcomp>":
  561. opname = "LOAD_SETCOMP"
  562. elif co_name == "<listcomp>":
  563. opname = "LOAD_LISTCOMP"
  564. else:
  565. opname = "LOAD_CODE"
  566. # verify() uses 'pattr' for comparison, since 'attr'
  567. # now holds Code(const) and thus can not be used
  568. # for comparison (todo: think about changing this)
  569. # pattr = 'code_object @ 0x%x %s->%s' %\
  570. # (id(const), const.co_filename, co_name)
  571. pattr = "<code_object " + co_name + ">"
  572. elif isinstance(const, str):
  573. opname = "LOAD_STR"
  574. else:
  575. if isinstance(inst.arg, int) and inst.arg < len(co.co_consts):
  576. argval, _ = _get_const_info(inst.arg, co.co_consts)
  577. # Why don't we use _ above for "pattr" rather than "const"?
  578. # This *is* a little hoaky, but we have to coordinate with
  579. # other parts like n_LOAD_CONST in pysource.py for example.
  580. pattr = const
  581. pass
  582. elif opname == "LOAD_FAST" and argval == ".0":
  583. # Used as the parameter of a list expression
  584. opname = "LOAD_ARG"
  585. elif opname in ("MAKE_FUNCTION", "MAKE_CLOSURE"):
  586. if self.version >= (3, 6):
  587. # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION'
  588. flags = argval
  589. # FIXME: generalize this
  590. if flags == 8:
  591. opname = "MAKE_FUNCTION_CLOSURE"
  592. elif flags == 9:
  593. opname = "MAKE_FUNCTION_CLOSURE_POS"
  594. else:
  595. opname = f"MAKE_FUNCTION_{flags}"
  596. attr = []
  597. for flag in self.MAKE_FUNCTION_FLAGS:
  598. bit = flags & 1
  599. attr.append(bit)
  600. flags >>= 1
  601. attr = attr[:4] # remove last value: attr[5] == False
  602. else:
  603. pos_args, name_pair_args, annotate_args = parse_fn_counts_30_35(
  604. inst.argval
  605. )
  606. pattr = f"{pos_args} positional, {name_pair_args} keyword only, {annotate_args} annotated"
  607. if name_pair_args > 0 and annotate_args > 0:
  608. # FIXME: this should probably be K_
  609. opname += f"_N{name_pair_args}_A{annotate_args}"
  610. pass
  611. elif annotate_args > 0:
  612. opname += f"_A_{annotate_args}"
  613. pass
  614. elif name_pair_args > 0:
  615. opname += f"_N_{name_pair_args}"
  616. pass
  617. else:
  618. # Rule customization mathics, MAKE_FUNCTION_...
  619. # so make sure to add the "_"
  620. opname += "_0"
  621. attr = (pos_args, name_pair_args, annotate_args)
  622. new_tokens.append(
  623. Token(
  624. opname=opname,
  625. attr=attr,
  626. pattr=pattr,
  627. offset=inst.offset,
  628. linestart=inst.starts_line,
  629. op=op,
  630. has_arg=inst.has_arg,
  631. opc=self.opc,
  632. )
  633. )
  634. continue
  635. elif op in self.varargs_ops:
  636. pos_args = argval
  637. if self.is_pypy and not pos_args and opname == "BUILD_MAP":
  638. opname = "BUILD_MAP_n"
  639. else:
  640. opname = "%s_%d" % (opname, pos_args)
  641. elif self.is_pypy and opname in ("JUMP_IF_NOT_DEBUG", "CALL_FUNCTION"):
  642. if opname == "JUMP_IF_NOT_DEBUG":
  643. # The value in the dict is in special cases in semantic actions, such
  644. # as JUMP_IF_NOT_DEBUG. The value is not used in these cases, so we put
  645. # in arbitrary value 0.
  646. customize[opname] = 0
  647. elif self.version >= (3, 6) and argval > 255:
  648. opname = "CALL_FUNCTION_KW"
  649. pass
  650. elif opname == "UNPACK_EX":
  651. # FIXME: try with scanner and parser by
  652. # changing argval
  653. before_args = argval & 0xFF
  654. after_args = (argval >> 8) & 0xFF
  655. pattr = "%d before vararg, %d after" % (before_args, after_args)
  656. argval = (before_args, after_args)
  657. opname = "%s_%d+%d" % (opname, before_args, after_args)
  658. elif op == self.opc.JUMP_ABSOLUTE:
  659. # Further classify JUMP_ABSOLUTE into backward jumps
  660. # which are used in loops, and "CONTINUE" jumps which
  661. # may appear in a "continue" statement. The loop-type
  662. # and continue-type jumps will help us classify loop
  663. # boundaries The continue-type jumps help us get
  664. # "continue" statements with would otherwise be turned
  665. # into a "pass" statement because JUMPs are sometimes
  666. # ignored in rules as just boundary overhead. In
  667. # comprehensions we might sometimes classify JUMP_BACK
  668. # as CONTINUE, but that's okay since we add a grammar
  669. # rule for that.
  670. pattr = argval
  671. target = self.get_target(inst.offset)
  672. if target <= inst.offset:
  673. next_opname = self.insts[i + 1].opname
  674. # 'Continue's include jumps to loops that are not
  675. # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
  676. # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
  677. # then we'll take it as a "continue".
  678. is_continue = (
  679. self.insts[self.offset2inst_index[target]].opname == "FOR_ITER"
  680. and self.insts[i + 1].opname == "JUMP_FORWARD"
  681. )
  682. if (
  683. self.version[:2] == (3, 0)
  684. and self.insts[i + 1].opname == "JUMP_FORWARD"
  685. and not is_continue
  686. ):
  687. target_prev = self.offset2inst_index[self.prev_op[target]]
  688. is_continue = self.insts[target_prev].opname == "SETUP_LOOP"
  689. if is_continue or (
  690. inst.offset in self.stmts
  691. and (
  692. inst.starts_line
  693. and next_opname not in self.not_continue_follow
  694. )
  695. ):
  696. opname = "CONTINUE"
  697. else:
  698. opname = "JUMP_BACK"
  699. # FIXME: this is a hack to catch stuff like:
  700. # if x: continue
  701. # the "continue" is not on a new line.
  702. # There are other situations where we don't catch
  703. # CONTINUE as well.
  704. if (
  705. new_tokens[-1].kind == "JUMP_BACK"
  706. and new_tokens[-1].attr <= argval
  707. ):
  708. if new_tokens[-2].kind == "BREAK_LOOP":
  709. del new_tokens[-1]
  710. else:
  711. # intern is used because we are changing the *previous* token
  712. new_tokens[-1].kind = intern("CONTINUE")
  713. if last_op_was_break and opname == "CONTINUE":
  714. last_op_was_break = False
  715. continue
  716. # FIXME: go over for Python 3.6+. This is sometimes wrong
  717. elif op == self.opc.RETURN_VALUE:
  718. if inst.offset in self.return_end_ifs:
  719. opname = "RETURN_END_IF"
  720. elif inst.offset in self.load_asserts:
  721. opname = "LOAD_ASSERT"
  722. last_op_was_break = opname == "BREAK_LOOP"
  723. t.kind = opname
  724. t.attr = argval
  725. t.pattr = pattr
  726. new_tokens.append(t)
  727. pass
  728. if show_asm in ("both", "after"):
  729. print("\n# ---- tokenization:")
  730. # FIXME: t.format() is changing tokens!
  731. for t in new_tokens.copy():
  732. print(t.format(line_prefix=""))
  733. print()
  734. return new_tokens, customize
  735. def find_jump_targets(self, debug):
  736. """
  737. Detect all offsets in a byte code which are jump targets
  738. where we might insert a COME_FROM instruction.
  739. Return the list of offsets.
  740. Return the list of offsets. An instruction can be jumped
  741. to in from multiple instructions.
  742. """
  743. code = self.code
  744. n = len(code)
  745. self.structs = [{"type": "root", "start": 0, "end": n - 1}]
  746. # All loop entry points
  747. self.loops = []
  748. # Map fixed jumps to their real destination
  749. self.fixed_jumps = {}
  750. self.except_targets = {}
  751. self.ignore_if = set()
  752. self.build_statement_indices()
  753. self.else_start = {}
  754. # Containers filled by detect_control_flow()
  755. self.not_continue = set()
  756. self.return_end_ifs = set()
  757. self.setup_loop_targets = {} # target given setup_loop offset
  758. self.setup_loops = {} # setup_loop offset given target
  759. targets = {}
  760. for i, inst in enumerate(self.insts):
  761. offset = inst.offset
  762. op = inst.opcode
  763. # Determine structures and fix jumps in Python versions
  764. # since 2.3
  765. self.detect_control_flow(offset, targets, i)
  766. if inst.has_arg:
  767. label = self.fixed_jumps.get(offset)
  768. oparg = inst.arg
  769. if (
  770. self.version >= (3, 6)
  771. and self.code[offset] == self.opc.EXTENDED_ARG
  772. ):
  773. j = xdis.next_offset(op, self.opc, offset)
  774. next_offset = xdis.next_offset(op, self.opc, j)
  775. else:
  776. next_offset = xdis.next_offset(op, self.opc, offset)
  777. if label is None:
  778. if op in self.opc.hasjrel and op != self.opc.FOR_ITER:
  779. label = next_offset + oparg
  780. elif op in self.opc.hasjabs:
  781. if op in self.jump_if_pop:
  782. if oparg > offset:
  783. label = oparg
  784. if label is not None and label != -1:
  785. targets[label] = targets.get(label, []) + [offset]
  786. elif op == self.opc.END_FINALLY and offset in self.fixed_jumps:
  787. label = self.fixed_jumps[offset]
  788. targets[label] = targets.get(label, []) + [offset]
  789. pass
  790. pass # for loop
  791. # DEBUG:
  792. if debug in ("both", "after"):
  793. import pprint as pp
  794. pp.pprint(self.structs)
  795. return targets
  796. def build_statement_indices(self):
  797. code = self.code
  798. start = 0
  799. end = codelen = len(code)
  800. # Compose preliminary list of indices with statements,
  801. # using plain statement opcodes
  802. prelim = self.inst_matches(start, end, self.statement_opcodes)
  803. # Initialize final container with statements with
  804. # preliminary data
  805. stmts = self.stmts = set(prelim)
  806. # Same for opcode sequences
  807. pass_stmts = set()
  808. for sequence in self.statement_opcode_sequences:
  809. for i in self.op_range(start, end - (len(sequence) + 1)):
  810. match = True
  811. for elem in sequence:
  812. if elem != code[i]:
  813. match = False
  814. break
  815. i += instruction_size(code[i], self.opc)
  816. if match is True:
  817. i = self.prev_op[i]
  818. stmts.add(i)
  819. pass_stmts.add(i)
  820. # Initialize statement list with the full data we've gathered so far
  821. if pass_stmts:
  822. stmt_offset_list = list(stmts)
  823. stmt_offset_list.sort()
  824. else:
  825. stmt_offset_list = prelim
  826. # 'List-map' which contains offset of start of
  827. # next statement, when op offset is passed as index
  828. self.next_stmt = slist = []
  829. last_stmt_offset = -1
  830. i = 0
  831. # Go through all statement offsets
  832. for stmt_offset in stmt_offset_list:
  833. # Process absolute jumps, but do not remove 'pass' statements
  834. # from the set
  835. if (
  836. code[stmt_offset] == self.opc.JUMP_ABSOLUTE
  837. and stmt_offset not in pass_stmts
  838. ):
  839. # If absolute jump occurs in forward direction or it takes off from the
  840. # same line as previous statement, this is not a statement
  841. # FIXME: 0 isn't always correct
  842. target = self.get_target(stmt_offset)
  843. if (
  844. target > stmt_offset
  845. or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no
  846. ):
  847. stmts.remove(stmt_offset)
  848. continue
  849. # Rewing ops till we encounter non-JUMP_ABSOLUTE one
  850. j = self.prev_op[stmt_offset]
  851. while code[j] == self.opc.JUMP_ABSOLUTE:
  852. j = self.prev_op[j]
  853. # If we got here, then it's list comprehension which
  854. # is not a statement too
  855. if code[j] == self.opc.LIST_APPEND:
  856. stmts.remove(stmt_offset)
  857. continue
  858. # Exclude ROT_TWO + POP_TOP
  859. elif (
  860. code[stmt_offset] == self.opc.POP_TOP
  861. and code[self.prev_op[stmt_offset]] == self.opc.ROT_TWO
  862. ):
  863. stmts.remove(stmt_offset)
  864. continue
  865. # Exclude FOR_ITER + designators
  866. elif code[stmt_offset] in self.designator_ops:
  867. j = self.prev_op[stmt_offset]
  868. while code[j] in self.designator_ops:
  869. j = self.prev_op[j]
  870. if code[j] == self.opc.FOR_ITER:
  871. stmts.remove(stmt_offset)
  872. continue
  873. # Add to list another list with offset of current statement,
  874. # equal to length of previous statement
  875. slist += [stmt_offset] * (stmt_offset - i)
  876. last_stmt_offset = stmt_offset
  877. i = stmt_offset
  878. # Finish filling the list for last statement
  879. slist += [codelen] * (codelen - len(slist))
  880. def detect_control_flow(self, offset, targets, inst_index):
  881. """
  882. Detect type of block structures and their boundaries to fix optimized jumps
  883. in python2.3+
  884. """
  885. code = self.code
  886. inst = self.insts[inst_index]
  887. op = inst.opcode
  888. # Detect parent structure
  889. parent = self.structs[0]
  890. start = parent["start"]
  891. end = parent["end"]
  892. # Pick inner-most parent for our offset
  893. for struct in self.structs:
  894. current_start = struct["start"]
  895. current_end = struct["end"]
  896. if (current_start <= offset < current_end) and (
  897. current_start >= start and current_end <= end
  898. ):
  899. start = current_start
  900. end = current_end
  901. parent = struct
  902. if self.version < (3, 8) and op == self.opc.SETUP_LOOP:
  903. # We categorize loop types: 'for', 'while', 'while 1' with
  904. # possibly suffixes '-loop' and '-else'
  905. # Try to find the jump_back instruction of the loop.
  906. # It could be a return instruction.
  907. start += inst.inst_size
  908. target = self.get_target(offset)
  909. end = self.restrict_to_parent(target, parent)
  910. self.setup_loops[target] = offset
  911. if target != end:
  912. self.fixed_jumps[offset] = end
  913. (line_no, next_line_byte) = self.lines[offset]
  914. jump_back = self.last_instr(
  915. start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False
  916. )
  917. if jump_back:
  918. jump_forward_offset = xdis.next_offset(
  919. code[jump_back], self.opc, jump_back
  920. )
  921. else:
  922. jump_forward_offset = None
  923. return_val_offset1 = self.prev[self.prev[end]]
  924. if (
  925. jump_back
  926. and jump_back != self.prev_op[end]
  927. and self.is_jump_forward(jump_forward_offset)
  928. ):
  929. if code[self.prev_op[end]] == self.opc.RETURN_VALUE or (
  930. code[self.prev_op[end]] == self.opc.POP_BLOCK
  931. and code[return_val_offset1] == self.opc.RETURN_VALUE
  932. ):
  933. jump_back = None
  934. if not jump_back:
  935. # loop suite ends in return
  936. jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
  937. if not jump_back:
  938. return
  939. jb_inst = self.get_inst(jump_back)
  940. jump_back = self.next_offset(jb_inst.opcode, jump_back)
  941. if_offset = None
  942. if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf:
  943. if_offset = self.prev[next_line_byte]
  944. if if_offset:
  945. loop_type = "while"
  946. self.ignore_if.add(if_offset)
  947. else:
  948. loop_type = "for"
  949. target = next_line_byte
  950. end = xdis.next_offset(code[jump_back], self.opc, jump_back)
  951. else:
  952. if self.get_target(jump_back) >= next_line_byte:
  953. jump_back = self.last_instr(
  954. start, end, self.opc.JUMP_ABSOLUTE, start, False
  955. )
  956. jb_inst = self.get_inst(jump_back)
  957. jb_next_offset = self.next_offset(jb_inst.opcode, jump_back)
  958. if end > jb_next_offset and self.is_jump_forward(end):
  959. if self.is_jump_forward(jb_next_offset):
  960. if self.get_target(jb_next_offset) == self.get_target(end):
  961. self.fixed_jumps[offset] = jb_next_offset
  962. end = jb_next_offset
  963. elif target < offset:
  964. self.fixed_jumps[offset] = jb_next_offset
  965. end = jb_next_offset
  966. target = self.get_target(jump_back)
  967. if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
  968. loop_type = "for"
  969. else:
  970. loop_type = "while"
  971. test = self.prev_op[next_line_byte]
  972. if test == offset:
  973. loop_type = "while 1"
  974. elif self.code[test] in self.opc.JUMP_OPs:
  975. self.ignore_if.add(test)
  976. test_target = self.get_target(test)
  977. if test_target > (jump_back + 3):
  978. jump_back = test_target
  979. self.not_continue.add(jump_back)
  980. self.loops.append(target)
  981. self.structs.append(
  982. {"type": loop_type + "-loop", "start": target, "end": jump_back}
  983. )
  984. after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back)
  985. if after_jump_offset != end:
  986. self.structs.append(
  987. {
  988. "type": loop_type + "-else",
  989. "start": after_jump_offset,
  990. "end": end,
  991. }
  992. )
  993. elif op in self.pop_jump_tf:
  994. start = offset + inst.inst_size
  995. target = inst.argval
  996. rtarget = self.restrict_to_parent(target, parent)
  997. prev_op = self.prev_op
  998. # Do not let jump to go out of parent struct bounds
  999. if target != rtarget and parent["type"] == "and/or":
  1000. self.fixed_jumps[offset] = rtarget
  1001. return
  1002. # Does this jump to right after another conditional jump that is
  1003. # not myself? If so, it's part of a larger conditional.
  1004. # rocky: if we have a conditional jump to the next instruction, then
  1005. # possibly I am "skipping over" a "pass" or null statement.
  1006. pretarget = self.get_inst(prev_op[target])
  1007. if (
  1008. pretarget.opcode in self.pop_jump_if_pop
  1009. and (target > offset)
  1010. and pretarget.offset != offset
  1011. ):
  1012. # FIXME: hack upon hack...
  1013. # In some cases the pretarget can be a jump to the next instruction
  1014. # and these aren't and/or's either. We limit to 3.5+ since we experienced there
  1015. # but it might be earlier versions, or might be a general principle.
  1016. if self.version < (3, 5) or pretarget.argval != target:
  1017. # FIXME: this is not accurate The commented out below
  1018. # is what it should be. However grammar rules right now
  1019. # assume the incorrect offsets.
  1020. # self.fixed_jumps[offset] = target
  1021. self.fixed_jumps[offset] = pretarget.offset
  1022. self.structs.append(
  1023. {"type": "and/or", "start": start, "end": pretarget.offset}
  1024. )
  1025. return
  1026. # The opcode *two* instructions before the target jump offset is important
  1027. # in making a determination of what we have. Save that.
  1028. pre_rtarget = prev_op[rtarget]
  1029. # Is it an "and" inside an "if" or "while" block
  1030. if op == self.opc.POP_JUMP_IF_FALSE:
  1031. # Search for another POP_JUMP_IF_FALSE targeting the same op,
  1032. # in current statement, starting from current offset, and filter
  1033. # everything inside inner 'or' jumps and midline ifs
  1034. match = self.rem_or(
  1035. start, self.next_stmt[offset], self.opc.POP_JUMP_IF_FALSE, target
  1036. )
  1037. # FIXME: Remove this whole "if" block
  1038. # If we still have any offsets in set, start working on it
  1039. if match:
  1040. is_jump_forward = self.is_jump_forward(pre_rtarget)
  1041. if (
  1042. is_jump_forward
  1043. and pre_rtarget not in self.stmts
  1044. and self.restrict_to_parent(
  1045. self.get_target(pre_rtarget), parent
  1046. )
  1047. == rtarget
  1048. ):
  1049. if (
  1050. code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE
  1051. and self.remove_mid_line_ifs([offset])
  1052. and target == self.get_target(prev_op[pre_rtarget])
  1053. and (
  1054. prev_op[pre_rtarget] not in self.stmts
  1055. or self.get_target(prev_op[pre_rtarget])
  1056. > prev_op[pre_rtarget]
  1057. )
  1058. and 1
  1059. == len(
  1060. self.remove_mid_line_ifs(
  1061. self.rem_or(
  1062. start,
  1063. prev_op[pre_rtarget],
  1064. self.pop_jump_tf,
  1065. target,
  1066. )
  1067. )
  1068. )
  1069. ):
  1070. pass
  1071. elif (
  1072. code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE
  1073. and self.remove_mid_line_ifs([offset])
  1074. and 1
  1075. == (
  1076. len(
  1077. set(
  1078. self.remove_mid_line_ifs(
  1079. self.rem_or(
  1080. start,
  1081. prev_op[pre_rtarget],
  1082. self.pop_jump_tf,
  1083. target,
  1084. )
  1085. )
  1086. )
  1087. | set(
  1088. self.remove_mid_line_ifs(
  1089. self.rem_or(
  1090. start,
  1091. prev_op[pre_rtarget],
  1092. (
  1093. self.opc.POP_JUMP_IF_FALSE,
  1094. self.opc.POP_JUMP_IF_TRUE,
  1095. self.opc.JUMP_ABSOLUTE,
  1096. ),
  1097. pre_rtarget,
  1098. True,
  1099. )
  1100. )
  1101. )
  1102. )
  1103. )
  1104. ):
  1105. pass
  1106. elif self.version <= (3, 2):
  1107. fix = None
  1108. jump_ifs = self.inst_matches(
  1109. start,
  1110. self.next_stmt[offset],
  1111. self.opc.POP_JUMP_IF_FALSE,
  1112. )
  1113. last_jump_good = True
  1114. for j in jump_ifs:
  1115. if target == self.get_target(j):
  1116. # FIXME: remove magic number
  1117. if self.lines[j].next == j + 3 and last_jump_good:
  1118. fix = j
  1119. break
  1120. else:
  1121. last_jump_good = False
  1122. self.fixed_jumps[offset] = fix or match[-1]
  1123. return
  1124. else:
  1125. if self.version < (3, 6):
  1126. # FIXME: this is putting in COME_FROMs in the wrong place.
  1127. # Fix up grammar so we don't need to do this.
  1128. # See cf_for_iter use in parser36.py
  1129. self.fixed_jumps[offset] = match[-1]
  1130. elif target > offset:
  1131. # Right now we only add COME_FROMs in forward (not loop) jumps
  1132. self.fixed_jumps[offset] = target
  1133. return
  1134. # op == POP_JUMP_IF_TRUE
  1135. else:
  1136. next = self.next_stmt[offset]
  1137. if prev_op[next] == offset:
  1138. pass
  1139. elif self.is_jump_forward(next) and target == self.get_target(next):
  1140. if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE:
  1141. if (
  1142. code[next] == self.opc.JUMP_FORWARD
  1143. or target != rtarget
  1144. or code[prev_op[pre_rtarget]]
  1145. not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)
  1146. ):
  1147. self.fixed_jumps[offset] = prev_op[next]
  1148. return
  1149. elif (
  1150. code[next] == self.opc.JUMP_ABSOLUTE
  1151. and self.is_jump_forward(target)
  1152. and self.get_target(target) == self.get_target(next)
  1153. ):
  1154. self.fixed_jumps[offset] = prev_op[next]
  1155. return
  1156. # Don't add a struct for a while test, it's already taken care of
  1157. if offset in self.ignore_if:
  1158. return
  1159. rtarget_is_ja = code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
  1160. if (
  1161. rtarget_is_ja
  1162. and pre_rtarget in self.stmts
  1163. and pre_rtarget != offset
  1164. and prev_op[pre_rtarget] != offset
  1165. and not (
  1166. code[rtarget] == self.opc.JUMP_ABSOLUTE
  1167. and code[rtarget + 3] == self.opc.POP_BLOCK
  1168. and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE
  1169. )
  1170. ):
  1171. rtarget = pre_rtarget
  1172. # Does the "jump if" jump beyond a jump op?
  1173. # That is, we have something like:
  1174. # POP_JUMP_IF_FALSE HERE
  1175. # ...
  1176. # JUMP_FORWARD
  1177. # HERE:
  1178. #
  1179. # If so, this can be block inside an "if" statement
  1180. # or a conditional assignment like:
  1181. # x = 1 if x else 2
  1182. #
  1183. # For 3.5, in addition the JUMP_FORWARD above we could have
  1184. # JUMP_BACK or CONTINUE
  1185. #
  1186. # There are other situations we may need to consider, like
  1187. # if the condition jump is to a forward location.
  1188. # Also the existence of a jump to the instruction after "END_FINALLY"
  1189. # will distinguish "try/else" from "try".
  1190. if self.version < (3, 8):
  1191. rtarget_break = (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP)
  1192. else:
  1193. rtarget_break = (self.opc.RETURN_VALUE,)
  1194. if self.is_jump_forward(pre_rtarget) or (
  1195. rtarget_is_ja and self.version >= (3, 5)
  1196. ):
  1197. if_end = self.get_target(pre_rtarget)
  1198. # If the jump target is back, we are looping
  1199. if (
  1200. if_end < pre_rtarget
  1201. and self.version < (3, 8)
  1202. and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)
  1203. ):
  1204. if if_end > start:
  1205. return
  1206. end = self.restrict_to_parent(if_end, parent)
  1207. self.structs.append(
  1208. {"type": "if-then", "start": start, "end": pre_rtarget}
  1209. )
  1210. # FIXME: add this
  1211. # self.fixed_jumps[offset] = rtarget
  1212. self.not_continue.add(pre_rtarget)
  1213. if rtarget < end and (
  1214. code[rtarget] not in (self.opc.END_FINALLY, self.opc.JUMP_ABSOLUTE)
  1215. and code[prev_op[pre_rtarget]]
  1216. not in (self.opc.POP_EXCEPT, self.opc.END_FINALLY)
  1217. ):
  1218. self.structs.append({"type": "else", "start": rtarget, "end": end})
  1219. self.else_start[rtarget] = end
  1220. elif self.is_jump_back(pre_rtarget, 0):
  1221. if_end = rtarget
  1222. self.structs.append(
  1223. {"type": "if-then", "start": start, "end": pre_rtarget}
  1224. )
  1225. self.not_continue.add(pre_rtarget)
  1226. elif code[pre_rtarget] in rtarget_break:
  1227. self.structs.append({"type": "if-then", "start": start, "end": rtarget})
  1228. # It is important to distinguish if this return is inside some sort
  1229. # except block return
  1230. jump_prev = prev_op[offset]
  1231. if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
  1232. if self.opc.cmp_op[code[jump_prev + 1]] == "exception-match":
  1233. return
  1234. if self.version >= (3, 5):
  1235. # Python 3.5 may remove as dead code a JUMP
  1236. # instruction after a RETURN_VALUE. So we check
  1237. # based on seeing SETUP_EXCEPT various places.
  1238. if self.version < (3, 6) and code[rtarget] == self.opc.SETUP_EXCEPT:
  1239. return
  1240. # Check that next instruction after pops and jump is
  1241. # not from SETUP_EXCEPT
  1242. next_op = rtarget
  1243. if code[next_op] == self.opc.POP_BLOCK:
  1244. next_op += instruction_size(self.code[next_op], self.opc)
  1245. if code[next_op] == self.opc.JUMP_ABSOLUTE:
  1246. next_op += instruction_size(self.code[next_op], self.opc)
  1247. if next_op in targets:
  1248. for try_op in targets[next_op]:
  1249. come_from_op = code[try_op]
  1250. if (
  1251. self.version < (3, 8)
  1252. and come_from_op == self.opc.SETUP_EXCEPT
  1253. ):
  1254. return
  1255. pass
  1256. pass
  1257. if self.version >= (3, 4):
  1258. self.fixed_jumps[offset] = rtarget
  1259. if code[pre_rtarget] == self.opc.RETURN_VALUE:
  1260. # If we are at some sort of POP_JUMP_IF and the instruction before was
  1261. # COMPARE_OP exception-match, then pre_rtarget is not an end_if
  1262. if not (
  1263. inst_index > 0
  1264. and self.insts[inst_index - 1].argval == "exception-match"
  1265. ):
  1266. self.return_end_ifs.add(pre_rtarget)
  1267. else:
  1268. self.fixed_jumps[offset] = rtarget
  1269. self.not_continue.add(pre_rtarget)
  1270. else:
  1271. # FIXME: this is very convoluted and based on rather hacky
  1272. # empirical evidence. It should go a way when
  1273. # we have better control-flow analysis
  1274. normal_jump = self.version >= (3, 6)
  1275. if self.version[:2] == (3, 5):
  1276. j = self.offset2inst_index[target]
  1277. if j + 2 < len(self.insts) and self.insts[j + 2].is_jump_target:
  1278. normal_jump = self.insts[j + 1].opname == "POP_BLOCK"
  1279. if normal_jump:
  1280. # For now, we'll only tag forward jump.
  1281. if target > offset:
  1282. self.fixed_jumps[offset] = target
  1283. pass
  1284. else:
  1285. # FIXME: This is probably a bug in < 3.5 and we should
  1286. # instead use the above code. But until we smoke things
  1287. # out we'll stick with it.
  1288. if rtarget > offset:
  1289. self.fixed_jumps[offset] = rtarget
  1290. elif self.version < (3, 8) and op == self.opc.SETUP_EXCEPT:
  1291. target = self.get_target(offset)
  1292. end = self.restrict_to_parent(target, parent)
  1293. self.fixed_jumps[offset] = end
  1294. elif op == self.opc.POP_EXCEPT:
  1295. next_offset = xdis.next_offset(op, self.opc, offset)
  1296. target = self.get_target(next_offset)
  1297. if target > next_offset:
  1298. next_op = code[next_offset]
  1299. if (
  1300. self.opc.JUMP_ABSOLUTE == next_op
  1301. and self.opc.END_FINALLY
  1302. != code[xdis.next_offset(next_op, self.opc, next_offset)]
  1303. ):
  1304. self.fixed_jumps[next_offset] = target
  1305. self.except_targets[target] = next_offset
  1306. elif op == self.opc.SETUP_FINALLY:
  1307. target = self.get_target(offset)
  1308. end = self.restrict_to_parent(target, parent)
  1309. self.fixed_jumps[offset] = end
  1310. elif op in self.jump_if_pop:
  1311. target = self.get_target(offset)
  1312. if target > offset:
  1313. unop_target = self.last_instr(
  1314. offset, target, self.opc.JUMP_FORWARD, target
  1315. )
  1316. if unop_target and code[unop_target + 3] != self.opc.ROT_TWO:
  1317. self.fixed_jumps[offset] = unop_target
  1318. else:
  1319. self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
  1320. pass
  1321. pass
  1322. elif self.version >= (3, 5):
  1323. # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
  1324. # misclassified as RETURN_END_IF. Handle that here.
  1325. # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
  1326. if op == self.opc.RETURN_VALUE:
  1327. next_offset = xdis.next_offset(op, self.opc, offset)
  1328. if next_offset < len(code) and (
  1329. code[next_offset] == self.opc.JUMP_ABSOLUTE
  1330. and offset in self.return_end_ifs
  1331. ):
  1332. self.return_end_ifs.remove(offset)
  1333. pass
  1334. pass
  1335. elif op == self.opc.JUMP_FORWARD:
  1336. # If we have:
  1337. # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
  1338. # then RETURN_VALUE is not RETURN_END_IF
  1339. rtarget = self.get_target(offset)
  1340. rtarget_prev = self.prev[rtarget]
  1341. if (
  1342. code[rtarget_prev] == self.opc.RETURN_VALUE
  1343. and rtarget_prev in self.return_end_ifs
  1344. ):
  1345. i = rtarget_prev
  1346. while i != offset:
  1347. if code[i] in [op3.JUMP_FORWARD, op3.JUMP_ABSOLUTE]:
  1348. return
  1349. i = self.prev[i]
  1350. self.return_end_ifs.remove(rtarget_prev)
  1351. pass
  1352. return
  1353. def is_jump_back(self, offset, extended_arg):
  1354. """
  1355. Return True if the code at offset is some sort of jump back.
  1356. That is, it is ether "JUMP_FORWARD" or an absolute jump that
  1357. goes forward.
  1358. """
  1359. if self.code[offset] != self.opc.JUMP_ABSOLUTE:
  1360. return False
  1361. return offset > self.get_target(offset, extended_arg)
  1362. def next_except_jump(self, start):
  1363. """
  1364. Return the next jump that was generated by an except SomeException:
  1365. construct in a try...except...else clause or None if not found.
  1366. """
  1367. if self.code[start] == self.opc.DUP_TOP:
  1368. except_match = self.first_instr(
  1369. start, len(self.code), self.opc.POP_JUMP_IF_FALSE
  1370. )
  1371. if except_match:
  1372. jmp = self.prev_op[self.get_target(except_match)]
  1373. self.ignore_if.add(except_match)
  1374. self.not_continue.add(jmp)
  1375. return jmp
  1376. count_END_FINALLY = 0
  1377. count_SETUP_ = 0
  1378. for i in self.op_range(start, len(self.code)):
  1379. op = self.code[i]
  1380. if op == self.opc.END_FINALLY:
  1381. if count_END_FINALLY == count_SETUP_:
  1382. assert self.code[self.prev_op[i]] in frozenset(
  1383. [
  1384. self.opc.JUMP_ABSOLUTE,
  1385. self.opc.JUMP_FORWARD,
  1386. self.opc.RETURN_VALUE,
  1387. ]
  1388. )
  1389. self.not_continue.add(self.prev_op[i])
  1390. return self.prev_op[i]
  1391. count_END_FINALLY += 1
  1392. elif op in self.setup_opts_no_loop:
  1393. count_SETUP_ += 1
  1394. def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
  1395. """
  1396. Find offsets of all requested <instr> between <start> and <end>,
  1397. optionally <target>ing specified offset, and return list found
  1398. <instr> offsets which are not within any POP_JUMP_IF_TRUE jumps.
  1399. """
  1400. assert start >= 0 and end <= len(self.code) and start <= end
  1401. # Find all offsets of requested instructions
  1402. instr_offsets = self.inst_matches(
  1403. start, end, instr, target, include_beyond_target
  1404. )
  1405. # Get all POP_JUMP_IF_TRUE (or) offsets
  1406. if self.version[:2] == (3, 0):
  1407. jump_true_op = self.opc.JUMP_IF_TRUE
  1408. else:
  1409. jump_true_op = self.opc.POP_JUMP_IF_TRUE
  1410. pjit_offsets = self.inst_matches(start, end, jump_true_op)
  1411. filtered = []
  1412. for pjit_offset in pjit_offsets:
  1413. pjit_tgt = self.get_target(pjit_offset) - 3
  1414. for instr_offset in instr_offsets:
  1415. if instr_offset <= pjit_offset or instr_offset >= pjit_tgt:
  1416. filtered.append(instr_offset)
  1417. instr_offsets = filtered
  1418. filtered = []
  1419. return instr_offsets
  1420. if __name__ == "__main__":
  1421. from xdis.version_info import PYTHON_VERSION_TRIPLE
  1422. if PYTHON_VERSION_TRIPLE >= (3, 2):
  1423. import inspect
  1424. co = inspect.currentframe().f_code
  1425. tokens, customize = Scanner3(PYTHON_VERSION_TRIPLE).ingest(co)
  1426. for t in tokens:
  1427. print(t)
  1428. else:
  1429. print("Need to be Python 3.2 or greater to demo; I am %s." % sys.version)
  1430. pass