extended.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923
  1. # (C) Copyright 2023-2025 by Rocky Bernstein
  2. #
  3. # This program is free software; you can redistribute it and/or
  4. # modify it under the terms of the GNU General Public License
  5. # as published by the Free Software Foundation; either version 2
  6. # of the License, or (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. """
  17. Routines for formatting opcodes.
  18. """
  19. import re
  20. from typing import List, Optional, Tuple
  21. from xdis.instruction import Instruction
  22. from xdis.opcodes.format.basic import format_IS_OP, format_RAISE_VARARGS_older
  23. NULL_EXTENDED_OP = "", None
  24. def extended_format_binary_op(
  25. opc: Instruction, instructions: List[Instruction], fmt_str: str
  26. ) -> Tuple[str, Optional[int]]:
  27. """
  28. General routine for formatting binary operations.
  29. A binary operations pops a two arguments off of the evaluation stack and
  30. pushes a single value back on the evaluation stack. Also, the instruction
  31. must not raise an exception and must control must flow to the next instruction.
  32. instructions is a list of instructions
  33. fmt_str is a format string that indicates the two arguments.
  34. the return constins the string that should be added to tos_str and
  35. the position in instructions of the first instruction where that contributes
  36. to the binary operation, that is the logical beginning instruction.
  37. """
  38. i = skip_cache(instructions, 1)
  39. stack_inst1 = instructions[i]
  40. arg1 = None
  41. # If stack_inst1 is a jump target, then its predecessor stack_inst2
  42. # is possibly one of two values.
  43. if not stack_inst1.is_jump_target:
  44. if stack_inst1.tos_str is not None:
  45. arg1 = stack_inst1.tos_str
  46. if arg1 is not None or stack_inst1.opcode in opc.operator_set:
  47. if arg1 is None:
  48. arg1 = stack_inst1.argrepr
  49. arg1_start_offset = stack_inst1.start_offset
  50. if arg1_start_offset is not None:
  51. i = get_instruction_index_from_offset(
  52. arg1_start_offset, instructions, 1
  53. )
  54. if i is None:
  55. return NULL_EXTENDED_OP
  56. j = skip_cache(instructions, i + 1)
  57. stack_inst2 = instructions[j]
  58. if (
  59. stack_inst1.opcode in opc.operator_set
  60. and stack_inst2.opcode in opc.operator_set
  61. and not stack_inst2.is_jump_target
  62. ):
  63. arg2 = get_instruction_arg(stack_inst2, stack_inst2.argrepr)
  64. start_offset = stack_inst2.start_offset
  65. return fmt_str % (arg2, arg1), start_offset
  66. elif stack_inst2.start_offset is not None:
  67. start_offset = stack_inst2.start_offset
  68. arg2 = get_instruction_arg(stack_inst2, stack_inst2.argrepr)
  69. if arg2 == "":
  70. arg2 = "..."
  71. return fmt_str % (arg2, arg1), start_offset
  72. else:
  73. return fmt_str % ("...", arg1), None
  74. return NULL_EXTENDED_OP
  75. def extended_format_infix_binary_op(
  76. opc, instructions: list[Instruction], op_str: str
  77. ) -> Tuple[str, Optional[int]]:
  78. """ """
  79. i = 1
  80. # 3.11+ has CACHE instructions
  81. while instructions[i].opname == "CACHE":
  82. i += 1
  83. stack_arg1 = instructions[i]
  84. arg1 = None
  85. if stack_arg1.tos_str is not None:
  86. arg1 = stack_arg1.tos_str
  87. if arg1 is not None or stack_arg1.opcode in opc.operator_set:
  88. if arg1 is None:
  89. arg1 = instructions[1].argrepr
  90. else:
  91. arg1 = f"({arg1})"
  92. arg1_start_offset = instructions[1].start_offset
  93. if arg1_start_offset is not None:
  94. i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1)
  95. if i is None:
  96. return NULL_EXTENDED_OP
  97. j = i + 1
  98. # 3.11+ has CACHE instructions
  99. while instructions[j].opname == "CACHE":
  100. j += 1
  101. if (
  102. instructions[j].opcode in opc.operator_set
  103. and instructions[i].opcode in opc.operator_set
  104. ):
  105. arg2 = get_instruction_tos_str(instructions[j])
  106. start_offset = instructions[j].start_offset
  107. return f"{arg2}{op_str}{arg1}", start_offset
  108. elif instructions[j].start_offset is not None:
  109. start_offset = instructions[j].start_offset
  110. arg2 = (
  111. instructions[j].tos_str
  112. if instructions[j].tos_str is not None
  113. else instructions[j].argrepr
  114. )
  115. if arg2 == "":
  116. arg2 = "..."
  117. else:
  118. arg2 = f"({arg2})"
  119. return f"{arg2}{op_str}{arg1}", start_offset
  120. else:
  121. return f"...{op_str}{arg1}", None
  122. return NULL_EXTENDED_OP
  123. def extended_format_store_op(
  124. opc, instructions: List[Instruction]
  125. ) -> Tuple[str, Optional[int]]:
  126. inst = instructions[0]
  127. # If the store instruction is a jump target, then
  128. # the previous instruction is ambiguous. Here, things
  129. # are more complicated, so let's not try to figure this out.
  130. # This kind of things is best left for a decompiler.
  131. if inst.is_jump_target:
  132. return NULL_EXTENDED_OP
  133. prev_inst = instructions[1]
  134. start_offset = prev_inst.offset
  135. if prev_inst.opcode in opc.operator_set:
  136. if prev_inst.opcode in opc.nullaryloadop:
  137. argval = safe_repr(prev_inst.argval)
  138. elif (
  139. prev_inst.opcode in opc.VARGS_OPS | opc.NARGS_OPS
  140. and prev_inst.tos_str is None
  141. ):
  142. # In variable arguments lists and function-like calls
  143. # argval is a count. So we need a TOS representation
  144. # to do something here.
  145. return "", start_offset
  146. else:
  147. argval = prev_inst.argval
  148. argval = get_instruction_arg(prev_inst, argval)
  149. start_offset = prev_inst.start_offset
  150. if prev_inst.opname.startswith("INPLACE_"):
  151. # Inplace operators have their own assign routine.
  152. return argval, start_offset
  153. return f"{inst.argval} = {argval}", start_offset
  154. return "", start_offset
  155. def extended_format_ternary_op(
  156. opc, instructions: list[Instruction], fmt_str: str
  157. ) -> Tuple[str, Optional[int]]:
  158. """
  159. General routine for formatting ternary operations.
  160. A ternary operations pops a three arguments off of the evaluation stack and
  161. pushes a single value back on the evaluation stack. Also, the instruction
  162. must not raise an exception and must control must flow to the next instruction.
  163. instructions is a list of instructions
  164. fmt_str is a format string that indicates the two arguments.
  165. the return constins the string that should be added to tos_str and
  166. the position in instructions of the first instruction where that contributes
  167. to the binary operation, that is the logical beginning instruction.
  168. """
  169. i = skip_cache(instructions, 1)
  170. stack_inst1 = instructions[i]
  171. arg1 = None
  172. if stack_inst1.tos_str is not None:
  173. arg1 = stack_inst1.tos_str
  174. if arg1 is not None or stack_inst1.opcode in opc.operator_set:
  175. if arg1 is None:
  176. arg1 = stack_inst1.argrepr
  177. arg1_start_offset = stack_inst1.start_offset
  178. if arg1_start_offset is not None:
  179. i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1)
  180. if i is None:
  181. return NULL_EXTENDED_OP
  182. j = skip_cache(instructions, i + 1)
  183. stack_inst2 = instructions[j]
  184. if (
  185. stack_inst1.opcode in opc.operator_set
  186. and stack_inst2.opcode in opc.operator_set
  187. and not stack_inst2.is_jump_target
  188. ):
  189. arg2 = get_instruction_arg(stack_inst2, stack_inst2.argrepr)
  190. k = skip_cache(instructions, j + 1)
  191. stack_inst3 = instructions[k + 1]
  192. start_offset = stack_inst3.start_offset
  193. if (
  194. stack_inst3.opcode in opc.operator_set
  195. and not stack_inst3.is_jump_target
  196. ):
  197. arg3 = get_instruction_arg(stack_inst3, stack_inst3.argrepr)
  198. return fmt_str % (arg2, arg1, arg3), start_offset
  199. else:
  200. arg3 = "..."
  201. return fmt_str % (arg2, arg1, arg3), start_offset
  202. elif stack_inst2.start_offset is not None and not stack_inst2.is_jump_target:
  203. start_offset = stack_inst2.start_offset
  204. arg2 = get_instruction_arg(stack_inst2, stack_inst2.argrepr)
  205. if arg2 == "":
  206. arg2 = "..."
  207. arg3 = "..."
  208. return fmt_str % (arg2, arg1, arg3), start_offset
  209. else:
  210. return fmt_str % ("...", "...", "..."), None
  211. return NULL_EXTENDED_OP
  212. def extended_format_STORE_SUBSCR(
  213. opc, instructions: List[Instruction]
  214. ) -> Tuple[str, Optional[int]]:
  215. return extended_format_ternary_op(
  216. opc,
  217. instructions,
  218. "%s[%s] = %s",
  219. )
  220. def extended_format_unary_op(
  221. opc, instructions: list[Instruction], fmt_str: str
  222. ) -> Tuple[str, Optional[int]]:
  223. stack_arg = instructions[1]
  224. start_offset = instructions[1].start_offset
  225. if stack_arg.tos_str is not None and not stack_arg.is_jump_target:
  226. return fmt_str % stack_arg.tos_str, start_offset
  227. if stack_arg.opcode in opc.operator_set:
  228. return fmt_str % stack_arg.argrepr, start_offset
  229. return NULL_EXTENDED_OP
  230. def extended_format_ATTR(
  231. opc, instructions: List[Instruction]
  232. ) -> Tuple[str, Optional[int]]:
  233. """
  234. Handles both LOAD_ATTR and STORE_ATTR
  235. """
  236. instr1 = instructions[1]
  237. if (
  238. instr1.tos_str
  239. or instr1.opcode in opc.NAME_OPS | opc.CONST_OPS | opc.LOCAL_OPS | opc.FREE_OPS
  240. ):
  241. base = get_instruction_tos_str(instr1)
  242. return (
  243. f"{base}.{instructions[0].argrepr}",
  244. instr1.start_offset,
  245. )
  246. return NULL_EXTENDED_OP
  247. def extended_format_BINARY_ADD(
  248. opc, instructions: List[Instruction]
  249. ) -> Tuple[str, Optional[int]]:
  250. return extended_format_infix_binary_op(opc, instructions, " + ")
  251. def extended_format_BINARY_AND(
  252. opc, instructions: List[Instruction]
  253. ) -> Tuple[str, Optional[int]]:
  254. return extended_format_infix_binary_op(opc, instructions, " & ")
  255. def extended_format_BINARY_FLOOR_DIVIDE(
  256. opc, instructions: List[Instruction]
  257. ) -> Tuple[str, Optional[int]]:
  258. return extended_format_infix_binary_op(opc, instructions, " // ")
  259. def extended_format_BINARY_LSHIFT(
  260. opc, instructions: List[Instruction]
  261. ) -> Tuple[str, Optional[int]]:
  262. return extended_format_infix_binary_op(opc, instructions, " << ")
  263. def extended_format_BINARY_MODULO(
  264. opc, instructions: List[Instruction]
  265. ) -> Tuple[str, Optional[int]]:
  266. return extended_format_infix_binary_op(opc, instructions, " % ")
  267. def extended_format_BINARY_MULTIPLY(
  268. opc, instructions: List[Instruction]
  269. ) -> Tuple[str, Optional[int]]:
  270. return extended_format_infix_binary_op(opc, instructions, " * ")
  271. def extended_format_BINARY_OR(
  272. opc, instructions: List[Instruction]
  273. ) -> Tuple[str, Optional[int]]:
  274. return extended_format_infix_binary_op(opc, instructions, " | ")
  275. def extended_format_BINARY_POWER(
  276. opc, instructions: List[Instruction]
  277. ) -> Tuple[str, Optional[int]]:
  278. return extended_format_infix_binary_op(opc, instructions, " ** ")
  279. def extended_format_BINARY_RSHIFT(
  280. opc, instructions: List[Instruction]
  281. ) -> Tuple[str, Optional[int]]:
  282. return extended_format_infix_binary_op(opc, instructions, " >> ")
  283. def extended_format_BINARY_SUBSCR(
  284. opc, instructions: List[Instruction]
  285. ) -> Tuple[str, Optional[int]]:
  286. return extended_format_binary_op(
  287. opc,
  288. instructions,
  289. "%s[%s]",
  290. )
  291. def extended_format_BINARY_SUBTRACT(
  292. opc, instructions: List[Instruction]
  293. ) -> Tuple[str, Optional[int]]:
  294. return extended_format_infix_binary_op(opc, instructions, " - ")
  295. def extended_format_BINARY_TRUE_DIVIDE(
  296. opc, instructions: List[Instruction]
  297. ) -> Tuple[str, Optional[int]]:
  298. return extended_format_infix_binary_op(opc, instructions, " / ")
  299. def extended_format_BINARY_XOR(
  300. opc, instructions: List[Instruction]
  301. ) -> Tuple[str, Optional[int]]:
  302. return extended_format_infix_binary_op(opc, instructions, " ^ ")
  303. def extended_format_build_tuple_or_list(
  304. opc, instructions: List[Instruction], left_delim: str, right_delim: str
  305. ) -> Tuple[str, Optional[int]]:
  306. arg_count = instructions[0].argval
  307. is_tuple = left_delim == "("
  308. if arg_count == 0:
  309. # Note: caller generally handles this when the below isn't right.
  310. return f"{left_delim}{right_delim}", instructions[0].offset
  311. arglist, _, i = get_arglist(instructions, 0, arg_count)
  312. if arglist is not None:
  313. assert isinstance(i, int)
  314. args_str = ", ".join(reversed(arglist))
  315. if arg_count == 1 and is_tuple:
  316. return f"{left_delim}{args_str},{right_delim}", instructions[i].start_offset
  317. else:
  318. return f"{left_delim}{args_str}{right_delim}", instructions[i].start_offset
  319. return NULL_EXTENDED_OP
  320. def extended_format_BUILD_CONST_KEY_MAP(opc, instructions):
  321. arg_count = instructions[0].argval
  322. if arg_count == 0:
  323. # Note: caller generally handles this when the below isn't right.
  324. return "{}", instructions[0].offset
  325. assert len(instructions) > 0
  326. key_tuple = instructions[1]
  327. key_values = key_tuple.argval
  328. if key_tuple.opname == "LOAD_CONST" and isinstance(key_values, tuple):
  329. arglist, _, i = get_arglist(instructions, 1, arg_count)
  330. if arglist is not None:
  331. assert isinstance(i, int)
  332. assert len(arglist) == len(key_values)
  333. arg_pairs = []
  334. for i in range(len(arglist)):
  335. arg_pairs.append(f"{key_values[i]}: {arglist[i]}")
  336. args_str = ", ".join(arg_pairs)
  337. return "{" + args_str + "}", instructions[i].start_offset
  338. return NULL_EXTENDED_OP
  339. def extended_format_BUILD_LIST(
  340. opc, instructions: List[Instruction]
  341. ) -> Tuple[str, Optional[int]]:
  342. return extended_format_build_tuple_or_list(opc, instructions, "[", "]")
  343. def extended_format_BUILD_SET(
  344. opc, instructions: List[Instruction]
  345. ) -> Tuple[str, Optional[int]]:
  346. if instructions[0].argval == 0:
  347. # Degenerate case
  348. return "set()", instructions[0].start_offset
  349. return extended_format_build_tuple_or_list(opc, instructions, "{", "}")
  350. def extended_format_BUILD_SLICE(
  351. opc, instructions: List[Instruction]
  352. ) -> Tuple[str, Optional[int]]:
  353. argc = instructions[0].argval
  354. assert argc in (2, 3)
  355. arglist, arg_count, i = get_arglist(instructions, 0, argc)
  356. if arg_count == 0:
  357. assert isinstance(i, int)
  358. arglist = ["" if arg == "None" else arg for arg in arglist]
  359. return ":".join(reversed(arglist)), instructions[i].start_offset
  360. if instructions[0].argval == 0:
  361. # Degenerate case
  362. return "set()", instructions[0].start_offset
  363. return NULL_EXTENDED_OP
  364. def extended_format_BUILD_TUPLE(
  365. opc, instructions: List[Instruction]
  366. ) -> Tuple[str, Optional[int]]:
  367. arg_count = instructions[0].argval
  368. if arg_count == 0:
  369. return "tuple()", instructions[0].start_offset
  370. return extended_format_build_tuple_or_list(opc, instructions, "(", ")")
  371. def extended_format_COMPARE_OP(
  372. opc, instructions: List[Instruction]
  373. ) -> Tuple[str, Optional[int]]:
  374. return extended_format_infix_binary_op(
  375. opc,
  376. instructions,
  377. f" {instructions[0].argval} ",
  378. )
  379. def extended_format_DUP_TOP(
  380. opc, instructions: List[Instruction]
  381. ) -> Tuple[str, Optional[int]]:
  382. """Try to extract TOS value and show that surrounded in a "push() ".
  383. The trailing space at the used as a sentinal for `get_instruction_tos_str()`
  384. which tries to remove the push() part when the operand value string is needed.
  385. """
  386. # We add a space at the end as a sentinal to use in get_instruction_tos_str()
  387. if instructions[1].optype not in ['jrel', 'jabs']:
  388. return extended_format_unary_op(opc, instructions, "push(%s) ")
  389. else:
  390. return NULL_EXTENDED_OP
  391. def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]]:
  392. """call_function_inst should be a "CALL_FUNCTION" instruction. Look in
  393. `instructions` to see if we can find a method name. If not we'll
  394. return None.
  395. """
  396. # From opcode description: arg_count indicates the total number of
  397. # positional and keyword arguments.
  398. call_inst = instructions[0]
  399. arg_count = call_inst.argval
  400. s = ""
  401. arglist, arg_count, i = get_arglist(instructions, 0, arg_count)
  402. if arglist is None:
  403. return NULL_EXTENDED_OP
  404. assert i is not None
  405. if i >= len(instructions) - 1:
  406. return NULL_EXTENDED_OP
  407. fn_inst = instructions[i + 1]
  408. if fn_inst.opcode in opc.operator_set:
  409. start_offset = fn_inst.offset
  410. if instructions[1].opname == "MAKE_FUNCTION" and opc.version_tuple >= (3, 3):
  411. arglist[0] = instructions[2].argval
  412. fn_name = fn_inst.tos_str if fn_inst.tos_str else fn_inst.argrepr
  413. arglist.reverse()
  414. s = f'{fn_name}({", ".join(arglist)})'
  415. return s, start_offset
  416. return NULL_EXTENDED_OP
  417. def extended_format_IMPORT_FROM(
  418. opc, instructions: List[Instruction]
  419. ) -> Tuple[str, Optional[int]]:
  420. assert len(instructions) >= 2
  421. i = 1
  422. while instructions[i].opname == "STORE_NAME":
  423. i = get_instruction_index_from_offset(
  424. instructions[i].start_offset, instructions, 1
  425. )
  426. if i is None:
  427. return NULL_EXTENDED_OP
  428. module_name = get_instruction_arg(instructions[i])
  429. if module_name.startswith("import_module("):
  430. module_name = module_name[len("import_module(") : -1]
  431. return (
  432. f"from {module_name} import {instructions[0].argval}",
  433. instructions[1].start_offset,
  434. )
  435. def extended_format_IMPORT_NAME(
  436. opc, instructions: List[Instruction]
  437. ) -> Tuple[str, Optional[int]]:
  438. inst = instructions[0]
  439. return f"import_module({inst.argval})", inst.offset
  440. def extended_format_INPLACE_ADD(
  441. opc, instructions: List[Instruction]
  442. ) -> Tuple[str, Optional[int]]:
  443. return extended_format_infix_binary_op(opc, instructions, " += ")
  444. def extended_format_INPLACE_AND(
  445. opc, instructions: List[Instruction]
  446. ) -> Tuple[str, Optional[int]]:
  447. return extended_format_infix_binary_op(opc, instructions, " &= ")
  448. def extended_format_INPLACE_FLOOR_DIVIDE(
  449. opc, instructions
  450. ) -> Tuple[str, Optional[int]]:
  451. return extended_format_infix_binary_op(opc, instructions, " //= ")
  452. def extended_format_INPLACE_LSHIFT(
  453. opc, instructions: List[Instruction]
  454. ) -> Tuple[str, Optional[int]]:
  455. return extended_format_infix_binary_op(opc, instructions, " <<= ")
  456. def extended_format_INPLACE_MODULO(
  457. opc, instructions: List[Instruction]
  458. ) -> Tuple[str, Optional[int]]:
  459. return extended_format_infix_binary_op(opc, instructions, " %%= ")
  460. def extended_format_INPLACE_MULTIPLY(
  461. opc, instructions: List[Instruction]
  462. ) -> Tuple[str, Optional[int]]:
  463. return extended_format_infix_binary_op(opc, instructions, " *= ")
  464. def extended_format_INPLACE_OR(
  465. opc, instructions: List[Instruction]
  466. ) -> Tuple[str, Optional[int]]:
  467. return extended_format_infix_binary_op(opc, instructions, " |= ")
  468. def extended_format_INPLACE_POWER(
  469. opc, instructions: List[Instruction]
  470. ) -> Tuple[str, Optional[int]]:
  471. return extended_format_infix_binary_op(opc, instructions, " **= ")
  472. def extended_format_INPLACE_TRUE_DIVIDE(
  473. opc, instructions: List[Instruction]
  474. ) -> Tuple[str, Optional[int]]:
  475. return extended_format_infix_binary_op(opc, instructions, " /= ")
  476. def extended_format_INPLACE_RSHIFT(
  477. opc, instructions: List[Instruction]
  478. ) -> Tuple[str, Optional[int]]:
  479. return extended_format_infix_binary_op(opc, instructions, " >>= ")
  480. def extended_format_INPLACE_SUBTRACT(
  481. opc, instructions: List[Instruction]
  482. ) -> Tuple[str, Optional[int]]:
  483. return extended_format_infix_binary_op(opc, instructions, " -= ")
  484. def extended_format_INPLACE_XOR(
  485. opc, instructions: List[Instruction]
  486. ) -> Tuple[str, Optional[int]]:
  487. return extended_format_infix_binary_op(opc, instructions, " ^= ")
  488. def extended_format_IS_OP(
  489. opc, instructions: List[Instruction]
  490. ) -> Tuple[str, Optional[int]]:
  491. return extended_format_infix_binary_op(
  492. opc,
  493. instructions,
  494. f"%s {format_IS_OP(instructions[0].arg)} %s",
  495. )
  496. def extended_format_LOAD_BUILD_CLASS(
  497. opc, instructions: List[Instruction]
  498. ) -> Tuple[str, Optional[int]]:
  499. return "class", instructions[0].start_offset
  500. def extended_format_MAKE_FUNCTION_10_27(
  501. opc, instructions: List[Instruction]
  502. ) -> Tuple[str, int]:
  503. """
  504. instructions[0] should be a "MAKE_FUNCTION" or "MAKE_CLOSURE" instruction. TOS
  505. should have the function or closure name.
  506. This code works for Python versions up to and including 2.7.
  507. Python docs for MAKE_FUNCTION and MAKE_CLOSURE the was changed in 33, but testing
  508. shows that the change was really made in Python 3.0 or so.
  509. """
  510. # From opcode description: argc indicates the total number of positional
  511. # and keyword arguments. Sometimes the function name is in the stack arg
  512. # positions back.
  513. assert len(instructions) >= 2
  514. inst = instructions[0]
  515. assert inst.opname in ("MAKE_FUNCTION", "MAKE_CLOSURE")
  516. s = ""
  517. argc = instructions[0].argval
  518. if (argc >> 16) & 0x7FFF:
  519. # There is a tuple listing the parameter names for the annotations
  520. code_inst = instructions[2]
  521. else:
  522. code_inst = instructions[1]
  523. start_offset = code_inst.offset
  524. if code_inst.opname == "LOAD_CONST" and hasattr(code_inst.argval, "co_name"):
  525. # FIXME: we can probably much better than this.
  526. # But this is a start.
  527. signature = extended_function_signature(code_inst.argval)
  528. s += f"def {code_inst.argval.co_name}({signature}): " "..."
  529. return s, start_offset
  530. def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]:
  531. """call_method should be a "CALL_METHOD" instruction. Look in
  532. `instructions` to see if we can find a method name. If not we'll
  533. return None.
  534. """
  535. # From opcode description: arg_count indicates the total number of
  536. # positional and keyword arguments.
  537. call_method_inst = instructions[0]
  538. arg_count = call_method_inst.argval
  539. s = ""
  540. arglist, arg_count, first_arg = get_arglist(instructions, 0, arg_count)
  541. if first_arg is None or first_arg >= len(instructions) - 1:
  542. return NULL_EXTENDED_OP
  543. fn_inst = instructions[first_arg + 1]
  544. if fn_inst.opcode in opc.operator_set and arglist is not None:
  545. start_offset = fn_inst.offset
  546. if fn_inst.opname == "LOAD_METHOD":
  547. fn_name = fn_inst.tos_str if fn_inst.tos_str else fn_inst.argrepr
  548. arglist.reverse()
  549. s = f'{fn_name}({", ".join(arglist)})'
  550. return s, start_offset
  551. return NULL_EXTENDED_OP
  552. def extended_format_RAISE_VARARGS_older(
  553. opc, instructions: List[Instruction]
  554. ) -> Tuple[str, Optional[int]]:
  555. raise_inst = instructions[0]
  556. assert raise_inst.opname == "RAISE_VARARGS"
  557. argc = raise_inst.argval
  558. start_offset = raise_inst.start_offset
  559. if argc == 0:
  560. return "reraise", start_offset
  561. elif argc == 1:
  562. exception_name_inst = instructions[1]
  563. start_offset = exception_name_inst.start_offset
  564. exception_name = (
  565. exception_name_inst.tos_str
  566. if exception_name_inst.tos_str
  567. else exception_name_inst.argrepr
  568. )
  569. if exception_name is not None:
  570. return f"raise {exception_name}()", start_offset
  571. return format_RAISE_VARARGS_older(raise_inst.argval), start_offset
  572. def extended_format_RETURN_VALUE(
  573. opc, instructions: List[Instruction]
  574. ) -> Tuple[str, Optional[int]]:
  575. return extended_format_unary_op(opc, instructions, "return %s")
  576. def extended_format_UNARY_INVERT(
  577. opc, instructions: List[Instruction]
  578. ) -> Tuple[str, Optional[int]]:
  579. return extended_format_unary_op(opc, instructions, "~(%s)")
  580. def extended_format_UNARY_NEGATIVE(
  581. opc, instructions: List[Instruction]
  582. ) -> Tuple[str, Optional[int]]:
  583. return extended_format_unary_op(opc, instructions, "-(%s)")
  584. def extended_format_UNARY_NOT(
  585. opc, instructions: List[Instruction]
  586. ) -> Tuple[str, Optional[int]]:
  587. return extended_format_unary_op(opc, instructions, "not (%s)")
  588. def extended_function_signature(code) -> str:
  589. """
  590. Return some representation for a code object.
  591. """
  592. # FIXME: we can probably much better than this.
  593. # But this is a start.
  594. return "" if code.co_argcount == 0 else "..."
  595. def get_arglist(
  596. instructions: List[Instruction], i: int, arg_count: int
  597. ) -> Tuple[Optional[list], int, Optional[int]]:
  598. """
  599. For a variable-length instruction like BUILD_TUPLE, or
  600. a variable-name argument list, like CALL_FUNCTION
  601. accumulate and find the beginning of the list and return:
  602. * argument list
  603. * number of arguments parsed
  604. * the instruction index of the first instruction
  605. """
  606. arglist = []
  607. inst = None
  608. n = len(instructions) - 1
  609. to_do = arg_count
  610. while to_do > 0 and i < n:
  611. i += 1
  612. inst = instructions[i]
  613. if inst.is_jump_target:
  614. return None, -1, None
  615. arg = inst.tos_str if inst.tos_str else inst.argrepr
  616. if inst.opname == "CACHE":
  617. continue
  618. to_do -= 1
  619. if arg is not None:
  620. arglist.append(arg)
  621. elif not arg:
  622. return arglist, arg_count - to_do, i
  623. else:
  624. arglist.append("???")
  625. if inst.is_jump_target:
  626. i += 1
  627. break
  628. start_offset = inst.start_offset
  629. if start_offset is not None:
  630. j = i
  631. while j < len(instructions) - 1:
  632. j += 1
  633. inst2 = instructions[j]
  634. if inst2.offset == start_offset:
  635. inst = inst2
  636. if inst2.start_offset is None or inst2.start_offset == start_offset:
  637. i = j
  638. break
  639. else:
  640. start_offset = inst2.start_offset
  641. pass
  642. return arglist, arg_count - to_do, i
  643. def get_instruction_arg(inst: Instruction, argval=None) -> str:
  644. argval = inst.argrepr if argval is None else argval
  645. return inst.tos_str if inst.tos_str is not None else argval
  646. def get_instruction_tos_str(inst: Instruction) -> str:
  647. if inst.tos_str is not None:
  648. argval = inst.tos_str
  649. argval_without_push = re.match(r"^(?:push|copy)\((.+)\) ", argval)
  650. if argval_without_push:
  651. # remove surrounding "push(...) or copy(...)" string
  652. argval = argval_without_push.group(1)
  653. else:
  654. argval = inst.argrepr
  655. return argval
  656. def get_instruction_index_from_offset(
  657. target_offset: int, instructions: List[Instruction], start_index: int = 1
  658. ) -> Optional[int]:
  659. for i in range(start_index, len(instructions)):
  660. if instructions[i].offset == target_offset:
  661. return i
  662. return None
  663. def resolved_attrs(instructions: List[Instruction]) -> Tuple[str, int]:
  664. """ """
  665. # we can probably speed up using the "tos_str" field.
  666. resolved = []
  667. start_offset = 0
  668. for inst in instructions:
  669. name = inst.argrepr
  670. if name:
  671. if name[0] == "'" and name[-1] == "'":
  672. name = name[1:-1]
  673. else:
  674. name = ""
  675. resolved.append(name)
  676. if inst.opname != "LOAD_ATTR":
  677. start_offset = inst.offset
  678. break
  679. return ".".join(reversed(resolved)), start_offset
  680. def safe_repr(obj, max_len: int = 20) -> str:
  681. """
  682. String repr with length at most ``max_len``
  683. """
  684. try:
  685. result = repr(obj)
  686. except Exception:
  687. result = object.__repr__(obj)
  688. if len(result) > max_len:
  689. return result[:max_len] + "..."
  690. return result
  691. def short_code_repr(code) -> str:
  692. """
  693. A shortened string representation of a code object
  694. """
  695. if hasattr(code, "co_name"):
  696. return f"<code object {code.co_name}>"
  697. else:
  698. return f"<code object {code}>"
  699. def skip_cache(instructions: List[Instruction], i: int) -> int:
  700. """Python 3.11+ has CACHE instructions.
  701. Skip over those starting at index i and return
  702. the index of the first instruction that is not CACHE
  703. or return the length of the list if we can't find
  704. such an instruction.
  705. """
  706. n = len(instructions)
  707. while i < n and instructions[i].opname == "CACHE":
  708. i += 1
  709. return i
  710. # fmt: off
  711. # The below are roughly Python 3.3 based. Python 3.11 removes some of these.
  712. opcode_extended_fmt_base = {
  713. "BINARY_ADD": extended_format_BINARY_ADD,
  714. "BINARY_AND": extended_format_BINARY_AND,
  715. "BINARY_FLOOR_DIVIDE": extended_format_BINARY_FLOOR_DIVIDE,
  716. "BINARY_MODULO": extended_format_BINARY_MODULO,
  717. "BINARY_MULTIPLY": extended_format_BINARY_MULTIPLY,
  718. "BINARY_RSHIFT": extended_format_BINARY_RSHIFT,
  719. "BINARY_SUBSCR": extended_format_BINARY_SUBSCR,
  720. "BINARY_SUBTRACT": extended_format_BINARY_SUBTRACT,
  721. "BINARY_TRUE_DIVIDE": extended_format_BINARY_TRUE_DIVIDE,
  722. "BINARY_LSHIFT": extended_format_BINARY_LSHIFT,
  723. "BINARY_OR": extended_format_BINARY_OR,
  724. "BINARY_POWER": extended_format_BINARY_POWER,
  725. "BINARY_XOR": extended_format_BINARY_XOR,
  726. "BUILD_CONST_KEY_MAP": extended_format_BUILD_CONST_KEY_MAP,
  727. "BUILD_LIST": extended_format_BUILD_LIST,
  728. "BUILD_SET": extended_format_BUILD_SET,
  729. "BUILD_SLICE": extended_format_BUILD_SLICE,
  730. "BUILD_TUPLE": extended_format_BUILD_TUPLE,
  731. "CALL_FUNCTION": extended_format_CALL_FUNCTION,
  732. "COMPARE_OP": extended_format_COMPARE_OP,
  733. "DUP_TOP": extended_format_DUP_TOP,
  734. "IMPORT_FROM": extended_format_IMPORT_FROM,
  735. "IMPORT_NAME": extended_format_IMPORT_NAME,
  736. "INPLACE_ADD": extended_format_INPLACE_ADD,
  737. "INPLACE_AND": extended_format_INPLACE_AND,
  738. "INPLACE_FLOOR_DIVIDE": extended_format_INPLACE_FLOOR_DIVIDE,
  739. "INPLACE_LSHIFT": extended_format_INPLACE_LSHIFT,
  740. "INPLACE_MODULO": extended_format_INPLACE_MODULO,
  741. "INPLACE_MULTIPLY": extended_format_INPLACE_MULTIPLY,
  742. "INPLACE_OR": extended_format_INPLACE_OR,
  743. "INPLACE_POWER": extended_format_INPLACE_POWER,
  744. "INPLACE_RSHIFT": extended_format_INPLACE_RSHIFT,
  745. "INPLACE_SUBTRACT": extended_format_INPLACE_SUBTRACT,
  746. "INPLACE_TRUE_DIVIDE": extended_format_INPLACE_TRUE_DIVIDE,
  747. "INPLACE_XOR": extended_format_INPLACE_XOR,
  748. "IS_OP": extended_format_IS_OP,
  749. "LOAD_ATTR": extended_format_ATTR,
  750. "LOAD_BUILD_CLASS": extended_format_LOAD_BUILD_CLASS,
  751. "MAKE_FUNCTION": extended_format_MAKE_FUNCTION_10_27,
  752. "RAISE_VARARGS": extended_format_RAISE_VARARGS_older,
  753. "RETURN_VALUE": extended_format_RETURN_VALUE,
  754. "STORE_ATTR": extended_format_ATTR,
  755. "STORE_DEREF": extended_format_store_op,
  756. "STORE_FAST": extended_format_store_op,
  757. "STORE_GLOBAL": extended_format_store_op,
  758. "STORE_NAME": extended_format_store_op,
  759. "STORE_SUBSCR": extended_format_STORE_SUBSCR,
  760. "UNARY_INVERT": extended_format_UNARY_INVERT,
  761. "UNARY_NEGATIVE": extended_format_UNARY_NEGATIVE,
  762. "UNARY_NOT": extended_format_UNARY_NOT,
  763. }
  764. # fmt: on