scanner30.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. # Copyright (c) 2016-2017, 2020-2021 by Rocky Bernstein
  2. """
  3. Python 3.0 bytecode scanner/deparser
  4. This sets up opcodes Python's 3.0 and calls a generalized
  5. scanner routine for Python 3.
  6. """
  7. from __future__ import print_function
  8. import xdis
  9. from xdis import instruction_size
  10. # bytecode verification, verify(), uses JUMP_OPs from here
  11. from xdis.opcodes import opcode_30 as opc
  12. from uncompyle6.scanners.scanner3 import Scanner3
  13. JUMP_TF = frozenset([opc.JUMP_IF_FALSE, opc.JUMP_IF_TRUE])
  14. class Scanner30(Scanner3):
  15. def __init__(self, show_asm=None, is_pypy=False):
  16. Scanner3.__init__(self, (3, 0), show_asm, is_pypy)
  17. return
  18. pass
  19. def detect_control_flow(self, offset, targets, inst_index):
  20. """
  21. Detect structures and their boundaries to fix optimized jumps
  22. Python 3.0 is more like Python 2.6 than it is Python 3.x.
  23. So we have a special routine here.
  24. """
  25. code = self.code
  26. op = self.insts[inst_index].opcode
  27. # Detect parent structure
  28. parent = self.structs[0]
  29. start = parent["start"]
  30. end = parent["end"]
  31. # Pick innermost parent for our offset
  32. for struct in self.structs:
  33. current_start = struct["start"]
  34. current_end = struct["end"]
  35. if (current_start <= offset < current_end) and (
  36. current_start >= start and current_end <= end
  37. ):
  38. start = current_start
  39. end = current_end
  40. parent = struct
  41. if op == self.opc.SETUP_LOOP:
  42. # We categorize loop types: 'for', 'while', 'while 1' with
  43. # possibly suffixes '-loop' and '-else'
  44. # Try to find the jump_back instruction of the loop.
  45. # It could be a return instruction.
  46. start += instruction_size(op, self.opc)
  47. target = self.get_target(offset)
  48. end = self.restrict_to_parent(target, parent)
  49. self.setup_loops[target] = offset
  50. if target != end:
  51. self.fixed_jumps[offset] = end
  52. (line_no, next_line_byte) = self.lines[offset]
  53. jump_back = self.last_instr(
  54. start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False
  55. )
  56. if jump_back:
  57. jump_forward_offset = xdis.next_offset(
  58. code[jump_back], self.opc, jump_back
  59. )
  60. else:
  61. jump_forward_offset = None
  62. return_val_offset1 = self.prev[self.prev[end]]
  63. if (
  64. jump_back
  65. and jump_back != self.prev_op[end]
  66. and self.is_jump_forward(jump_forward_offset)
  67. ):
  68. if code[self.prev_op[end]] == self.opc.RETURN_VALUE or (
  69. code[self.prev_op[end]] == self.opc.POP_BLOCK
  70. and code[return_val_offset1] == self.opc.RETURN_VALUE
  71. ):
  72. jump_back = None
  73. if not jump_back:
  74. # loop suite ends in return
  75. jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
  76. if not jump_back:
  77. return
  78. jb_inst = self.get_inst(jump_back)
  79. jump_back = self.next_offset(jb_inst.opcode, jump_back)
  80. if_offset = None
  81. if code[self.prev_op[next_line_byte]] not in JUMP_TF:
  82. if_offset = self.prev[next_line_byte]
  83. if if_offset:
  84. loop_type = "while"
  85. self.ignore_if.add(if_offset)
  86. else:
  87. loop_type = "for"
  88. target = next_line_byte
  89. end = jump_back + 3
  90. else:
  91. if self.get_target(jump_back) >= next_line_byte:
  92. jump_back = self.last_instr(
  93. start, end, self.opc.JUMP_ABSOLUTE, start, False
  94. )
  95. jb_inst = self.get_inst(jump_back)
  96. jb_next_offset = self.next_offset(jb_inst.opcode, jump_back)
  97. if end > jb_next_offset and self.is_jump_forward(end):
  98. if self.is_jump_forward(jb_next_offset):
  99. if self.get_target(jump_back + 4) == self.get_target(end):
  100. self.fixed_jumps[offset] = jump_back + 4
  101. end = jb_next_offset
  102. elif target < offset:
  103. self.fixed_jumps[offset] = jump_back + 4
  104. end = jb_next_offset
  105. target = self.get_target(jump_back)
  106. if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
  107. loop_type = "for"
  108. else:
  109. loop_type = "while"
  110. test = self.prev_op[next_line_byte]
  111. if test == offset:
  112. loop_type = "while 1"
  113. elif self.code[test] in self.opc.JUMP_OPs:
  114. self.ignore_if.add(test)
  115. test_target = self.get_target(test)
  116. if test_target > (jump_back + 3):
  117. jump_back = test_target
  118. self.not_continue.add(jump_back)
  119. self.loops.append(target)
  120. self.structs.append(
  121. {"type": loop_type + "-loop", "start": target, "end": jump_back}
  122. )
  123. after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back)
  124. if self.get_inst(after_jump_offset).opname == "POP_TOP":
  125. after_jump_offset = xdis.next_offset(
  126. code[after_jump_offset], self.opc, after_jump_offset
  127. )
  128. if after_jump_offset != end:
  129. self.structs.append(
  130. {
  131. "type": loop_type + "-else",
  132. "start": after_jump_offset,
  133. "end": end,
  134. }
  135. )
  136. elif op in self.pop_jump_tf:
  137. start = offset + instruction_size(op, self.opc)
  138. target = self.get_target(offset)
  139. rtarget = self.restrict_to_parent(target, parent)
  140. prev_op = self.prev_op
  141. # Do not let jump to go out of parent struct bounds
  142. if target != rtarget and parent["type"] == "and/or":
  143. self.fixed_jumps[offset] = rtarget
  144. return
  145. # Does this jump to right after another conditional jump that is
  146. # not myself? If so, it's part of a larger conditional.
  147. # rocky: if we have a conditional jump to the next instruction, then
  148. # possibly I am "skipping over" a "pass" or null statement.
  149. if (
  150. (code[prev_op[target]] in self.pop_jump_if_pop)
  151. and (target > offset)
  152. and prev_op[target] != offset
  153. ):
  154. self.fixed_jumps[offset] = prev_op[target]
  155. self.structs.append(
  156. {"type": "and/or", "start": start, "end": prev_op[target]}
  157. )
  158. return
  159. # The op offset just before the target jump offset is important
  160. # in making a determination of what we have. Save that.
  161. pre_rtarget = prev_op[rtarget]
  162. # Is it an "and" inside an "if" or "while" block
  163. if op == opc.JUMP_IF_FALSE:
  164. # Search for another JUMP_IF_FALSE targeting the same op,
  165. # in current statement, starting from current offset, and filter
  166. # everything inside inner 'or' jumps and midline ifs
  167. match = self.rem_or(
  168. start, self.next_stmt[offset], opc.JUMP_IF_FALSE, target
  169. )
  170. # If we still have any offsets in set, start working on it
  171. if match:
  172. is_jump_forward = self.is_jump_forward(pre_rtarget)
  173. if (
  174. is_jump_forward
  175. and pre_rtarget not in self.stmts
  176. and self.restrict_to_parent(
  177. self.get_target(pre_rtarget), parent
  178. )
  179. == rtarget
  180. ):
  181. if (
  182. code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE
  183. and self.remove_mid_line_ifs([offset])
  184. and target == self.get_target(prev_op[pre_rtarget])
  185. and (
  186. prev_op[pre_rtarget] not in self.stmts
  187. or self.get_target(prev_op[pre_rtarget])
  188. > prev_op[pre_rtarget]
  189. )
  190. and 1
  191. == len(
  192. self.remove_mid_line_ifs(
  193. self.rem_or(
  194. start, prev_op[pre_rtarget], JUMP_TF, target
  195. )
  196. )
  197. )
  198. ):
  199. pass
  200. elif (
  201. code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE
  202. and self.remove_mid_line_ifs([offset])
  203. and 1
  204. == (
  205. len(
  206. set(
  207. self.remove_mid_line_ifs(
  208. self.rem_or(
  209. start,
  210. prev_op[pre_rtarget],
  211. JUMP_TF,
  212. target,
  213. )
  214. )
  215. )
  216. | set(
  217. self.remove_mid_line_ifs(
  218. self.rem_or(
  219. start,
  220. prev_op[pre_rtarget],
  221. (
  222. opc.JUMP_IF_FALSE,
  223. opc.JUMP_IF_TRUE,
  224. opc.JUMP_ABSOLUTE,
  225. ),
  226. pre_rtarget,
  227. True,
  228. )
  229. )
  230. )
  231. )
  232. )
  233. ):
  234. pass
  235. else:
  236. fix = None
  237. jump_ifs = self.inst_matches(
  238. start, self.next_stmt[offset], opc.JUMP_IF_FALSE
  239. )
  240. last_jump_good = True
  241. for j in jump_ifs:
  242. if target == self.get_target(j):
  243. # FIXME: remove magic number
  244. if self.lines[j].next == j + 3 and last_jump_good:
  245. fix = j
  246. break
  247. else:
  248. last_jump_good = False
  249. self.fixed_jumps[offset] = fix or match[-1]
  250. return
  251. else:
  252. self.fixed_jumps[offset] = match[-1]
  253. return
  254. # op == JUMP_IF_TRUE
  255. else:
  256. next = self.next_stmt[offset]
  257. if prev_op[next] == offset:
  258. pass
  259. elif self.is_jump_forward(next) and target == self.get_target(next):
  260. if code[prev_op[next]] == opc.JUMP_IF_FALSE:
  261. if (
  262. code[next] == self.opc.JUMP_FORWARD
  263. or target != rtarget
  264. or code[prev_op[pre_rtarget]]
  265. not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)
  266. ):
  267. self.fixed_jumps[offset] = prev_op[next]
  268. return
  269. elif (
  270. code[next] == self.opc.JUMP_ABSOLUTE
  271. and self.is_jump_forward(target)
  272. and self.get_target(target) == self.get_target(next)
  273. ):
  274. self.fixed_jumps[offset] = prev_op[next]
  275. return
  276. # Don't add a struct for a while test, it's already taken care of
  277. if offset in self.ignore_if:
  278. return
  279. if (
  280. code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
  281. and pre_rtarget in self.stmts
  282. and pre_rtarget != offset
  283. and prev_op[pre_rtarget] != offset
  284. and not (
  285. code[rtarget] == self.opc.JUMP_ABSOLUTE
  286. and code[rtarget + 3] == self.opc.POP_BLOCK
  287. and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE
  288. )
  289. ):
  290. rtarget = pre_rtarget
  291. # Does the "jump if" jump beyond a jump op?
  292. # That is, we have something like:
  293. # JUMP_IF_FALSE HERE
  294. # ...
  295. # JUMP_FORWARD
  296. # HERE:
  297. #
  298. # If so, this can be block inside an "if" statement
  299. # or a conditional assignment like:
  300. # x = 1 if x else 2
  301. #
  302. # There are other contexts we may need to consider
  303. # like whether the target is "END_FINALLY"
  304. # or if the condition jump is to a forward location
  305. if self.is_jump_forward(pre_rtarget):
  306. if_end = self.get_target(pre_rtarget, 0)
  307. # If the jump target is back, we are looping
  308. if if_end < pre_rtarget and (
  309. code[prev_op[if_end]] == self.opc.SETUP_LOOP
  310. ):
  311. if if_end > start:
  312. return
  313. self.restrict_to_parent(if_end, parent)
  314. self.structs.append(
  315. {"type": "if-then", "start": start, "end": pre_rtarget}
  316. )
  317. self.not_continue.add(pre_rtarget)
  318. # if rtarget < end and (
  319. # code[rtarget] not in (self.opc.END_FINALLY,
  320. # self.opc.JUMP_ABSOLUTE) and
  321. # code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT,
  322. # self.opc.END_FINALLY)):
  323. # self.structs.append({'type': 'else',
  324. # 'start': rtarget,
  325. # 'end': end})
  326. # self.else_start[rtarget] = end
  327. elif self.is_jump_back(pre_rtarget, 0):
  328. self.structs.append(
  329. {"type": "if-then", "start": start, "end": pre_rtarget}
  330. )
  331. self.not_continue.add(pre_rtarget)
  332. elif code[pre_rtarget] in (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP):
  333. self.structs.append({"type": "if-then", "start": start, "end": rtarget})
  334. # It is important to distinguish if this return is inside some sort
  335. # except block return
  336. jump_prev = prev_op[offset]
  337. if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
  338. if self.opc.cmp_op[code[jump_prev + 1]] == "exception-match":
  339. return
  340. if self.version >= (3, 5):
  341. # Python 3.5 may remove as dead code a JUMP
  342. # instruction after a RETURN_VALUE. So we check
  343. # based on seeing SETUP_EXCEPT various places.
  344. if code[rtarget] == self.opc.SETUP_EXCEPT:
  345. return
  346. # Check that next instruction after pops and jump is
  347. # not from SETUP_EXCEPT
  348. next_op = rtarget
  349. if code[next_op] == self.opc.POP_BLOCK:
  350. next_op += instruction_size(self.code[next_op], self.opc)
  351. if code[next_op] == self.opc.JUMP_ABSOLUTE:
  352. next_op += instruction_size(self.code[next_op], self.opc)
  353. if next_op in targets:
  354. for try_op in targets[next_op]:
  355. come_from_op = code[try_op]
  356. if come_from_op == self.opc.SETUP_EXCEPT:
  357. return
  358. pass
  359. pass
  360. if code[pre_rtarget] == self.opc.RETURN_VALUE:
  361. if self.version == (3, 0):
  362. next_op = rtarget
  363. if code[next_op] == self.opc.POP_TOP:
  364. next_op = rtarget
  365. for block in self.structs:
  366. if (
  367. block["type"] == "while-loop"
  368. and block["end"] == next_op
  369. ):
  370. return
  371. next_op += instruction_size(self.code[next_op], self.opc)
  372. if code[next_op] == self.opc.POP_BLOCK:
  373. return
  374. self.return_end_ifs.add(pre_rtarget)
  375. else:
  376. self.fixed_jumps[offset] = rtarget
  377. self.not_continue.add(pre_rtarget)
  378. elif op == self.opc.SETUP_EXCEPT:
  379. target = self.get_target(offset)
  380. end = self.restrict_to_parent(target, parent)
  381. self.fixed_jumps[offset] = end
  382. elif op == self.opc.SETUP_FINALLY:
  383. target = self.get_target(offset)
  384. end = self.restrict_to_parent(target, parent)
  385. self.fixed_jumps[offset] = end
  386. elif op in self.jump_if_pop:
  387. target = self.get_target(offset)
  388. if target > offset:
  389. unop_target = self.last_instr(
  390. offset, target, self.opc.JUMP_FORWARD, target
  391. )
  392. if unop_target and code[unop_target + 3] != self.opc.ROT_TWO:
  393. self.fixed_jumps[offset] = unop_target
  394. else:
  395. self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
  396. pass
  397. pass
  398. elif self.version >= (3, 5):
  399. # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
  400. # misclassified as RETURN_END_IF. Handle that here.
  401. # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
  402. if op == self.opc.RETURN_VALUE:
  403. if (
  404. offset + 1 < len(code)
  405. and code[offset + 1] == self.opc.JUMP_ABSOLUTE
  406. and offset in self.return_end_ifs
  407. ):
  408. self.return_end_ifs.remove(offset)
  409. pass
  410. pass
  411. elif op == self.opc.JUMP_FORWARD:
  412. # If we have:
  413. # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
  414. # then RETURN_VALUE is not RETURN_END_IF
  415. rtarget = self.get_target(offset)
  416. rtarget_prev = self.prev[rtarget]
  417. if (
  418. code[rtarget_prev] == self.opc.RETURN_VALUE
  419. and rtarget_prev in self.return_end_ifs
  420. ):
  421. i = rtarget_prev
  422. while i != offset:
  423. if code[i] in [opc.JUMP_FORWARD, opc.JUMP_ABSOLUTE]:
  424. return
  425. i = self.prev[i]
  426. self.return_end_ifs.remove(rtarget_prev)
  427. pass
  428. return
  429. if __name__ == "__main__":
  430. from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str
  431. if PYTHON_VERSION_TRIPLE[:2] == (3, 0):
  432. import inspect
  433. co = inspect.currentframe().f_code # type: ignore
  434. tokens, customize = Scanner30().ingest(co)
  435. for t in tokens:
  436. print(t.format())
  437. pass
  438. else:
  439. print("Need to be Python 3.0 to demo; I am version %s" % version_tuple_to_str())