pysource.py 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282
  1. # Copyright (c) 2015-2024 by Rocky Bernstein
  2. # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
  3. # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
  4. # Copyright (c) 1999 John Aycock
  5. #
  6. # This program is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. """Creates Python source code from an decompyle3 parse tree.
  19. The terminal symbols are CPython bytecode instructions. (See the
  20. python documentation under module "dis" for a list of instructions
  21. and what they mean).
  22. Upper levels of the grammar is a more-or-less conventional grammar for
  23. Python.
  24. """
  25. # The below is a bit long, but still it is somewhat abbreviated.
  26. # See https://github.com/rocky/python-uncompyle6/wiki/Table-driven-semantic-actions.
  27. # for a more complete explanation, nicely marked up and with examples.
  28. #
  29. #
  30. # Semantic action rules for nonterminal symbols can be specified here by
  31. # creating a method prefaced with "n_" for that nonterminal. For
  32. # example, "n_exec_stmt" handles the semantic actions for the
  33. # "exec_stmt" nonterminal symbol. Similarly if a method with the name
  34. # of the nonterminal is suffixed with "_exit" it will be called after
  35. # all of its children are called.
  36. #
  37. # After a while writing methods this way, you'll find many routines which do similar
  38. # sorts of things, and soon you'll find you want a short notation to
  39. # describe rules and not have to create methods at all.
  40. #
  41. # So another other way to specify a semantic rule for a nonterminal is via
  42. # either tables MAP_R, or MAP_DIRECT where the key is the
  43. # nonterminal name.
  44. #
  45. # These dictionaries use a printf-like syntax to direct substitution
  46. # from attributes of the nonterminal and its children..
  47. #
  48. # The rest of the below describes how table-driven semantic actions work
  49. # and gives a list of the format specifiers. The default() and
  50. # template_engine() methods implement most of the below.
  51. #
  52. # We allow for a couple of ways to interact with a node in a tree. So
  53. # step 1 after not seeing a custom method for a nonterminal is to
  54. # determine from what point of view tree-wise the rule is applied.
  55. # In the diagram below, N is a nonterminal name, and K also a nonterminal
  56. # name but the one used as a key in the table.
  57. # we show where those are with respect to each other in the
  58. # parse tree for N.
  59. #
  60. #
  61. # N&K N
  62. # / | ... \ / | ... \
  63. # O O O O O K
  64. #
  65. #
  66. # TABLE_DIRECT TABLE_R
  67. #
  68. # The default table is TABLE_DIRECT mapping By far, most rules used work this way.
  69. #
  70. # The key K is then extracted from the subtree and used to find one
  71. # of the tables, T listed above. The result after applying T[K] is
  72. # a format string and arguments (a la printf()) for the formatting
  73. # engine.
  74. #
  75. # Escapes in the format string are:
  76. #
  77. # %c evaluate/traverse the node recursively. Its argument is a single
  78. # integer or tuple representing a node index.
  79. # If a tuple is given, the first item is the node index while
  80. # the second item is a string giving the node/noterminal name.
  81. # This name will be checked at runtime against the node type.
  82. #
  83. # %p like %c but sets the operator precedence.
  84. # Its argument then is a tuple indicating the node
  85. # index and the precedence value, an integer. If 3 items are given,
  86. # the second item is the nonterminal name and the precedence is given last.
  87. #
  88. # %C evaluate/travers children recursively, with sibling children separated by the
  89. # given string. It needs a 3-tuple: a starting node, the maximum
  90. # value of an end node, and a string to be inserted between sibling children
  91. #
  92. # %, Append ',' if last %C only printed one item. This is mostly for tuples
  93. # on the LHS of an assignment statement since BUILD_TUPLE_n pretty-prints
  94. # other tuples. The specifier takes no arguments
  95. #
  96. # %P same as %C but sets operator precedence. Its argument is a 4-tuple:
  97. # the node low and high indices, the separator, a string the precedence
  98. # value, an integer.
  99. #
  100. # %D Same as `%C` this is for left-recursive lists like kwargs where goes
  101. # to epsilon at the beginning. It needs a 3-tuple: a starting node, the
  102. # maximum value of an end node, and a string to be inserted between
  103. # sibling children. If we were to use `%C` an extra separator with an
  104. # epsilon would appear at the beginning.
  105. #
  106. # %| Insert spaces to the current indentation level. Takes no arguments.
  107. #
  108. # %+ increase current indentation level. Takes no arguments.
  109. #
  110. # %- decrease current indentation level. Takes no arguments.
  111. #
  112. # %{EXPR} Python eval(EXPR) in context of node. Takes no arguments
  113. #
  114. # %[N]{EXPR} Python eval(EXPR) in context of node[N]. Takes no arguments
  115. #
  116. # %[N]{%X} evaluate/recurse on child node[N], using specifier %X.
  117. # %X can be one of the above, e.g. %c, %p, etc. Takes the arguments
  118. # that the specifier uses.
  119. #
  120. # %% literal '%'. Takes no arguments.
  121. #
  122. #
  123. # The '%' may optionally be followed by a number (C) in square
  124. # brackets, which makes the template_engine walk down to N[C] before
  125. # evaluating the escape code.
  126. import sys
  127. from io import StringIO
  128. from typing import Optional
  129. from spark_parser import GenericASTTraversal
  130. from xdis import COMPILER_FLAG_BIT, IS_PYPY, iscode
  131. from xdis.version_info import PYTHON_VERSION_TRIPLE
  132. import decompyle3.parsers.main as python_parser
  133. import decompyle3.parsers.parse_heads as heads
  134. from decompyle3.parsers.main import get_python_parser
  135. from decompyle3.parsers.treenode import SyntaxTree
  136. from decompyle3.scanner import Code, get_scanner
  137. from decompyle3.scanners.tok import Token
  138. from decompyle3.semantics.check_ast import checker
  139. from decompyle3.semantics.consts import (
  140. INDENT_PER_LEVEL,
  141. LINE_LENGTH,
  142. MAP,
  143. MAP_DIRECT,
  144. NAME_MODULE,
  145. NO_PARENTHESIS_EVER,
  146. NONE,
  147. PASS,
  148. PRECEDENCE,
  149. TAB,
  150. TABLE_R,
  151. escape,
  152. )
  153. from decompyle3.semantics.customize import customize_for_version
  154. from decompyle3.semantics.gencomp import ComprehensionMixin
  155. from decompyle3.semantics.helper import find_globals_and_nonlocals, is_lambda_mode
  156. from decompyle3.semantics.n_actions import NonterminalActions
  157. from decompyle3.semantics.parser_error import ParserError
  158. from decompyle3.semantics.transform import TreeTransform
  159. from decompyle3.show import maybe_show_tree
  160. from decompyle3.util import better_repr
  161. PARSER_DEFAULT_DEBUG = {
  162. "rules": False,
  163. "transition": False,
  164. "reduce": False,
  165. "errorstack": "full",
  166. "context": True,
  167. "dups": False,
  168. }
  169. TREE_DEFAULT_DEBUG = {"before": False, "after": False}
  170. DEFAULT_DEBUG_OPTS = {
  171. "asm": False,
  172. "tree": TREE_DEFAULT_DEBUG,
  173. "grammar": dict(PARSER_DEFAULT_DEBUG),
  174. }
  175. class SourceWalkerError(Exception):
  176. def __init__(self, errmsg):
  177. self.errmsg = errmsg
  178. def __str__(self):
  179. return self.errmsg
  180. class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
  181. """
  182. Class to traverse a Parse Tree of the bytecode instruction built from parsing to
  183. produce some sort of source text.
  184. The Parse tree may be turned an Abstract Syntax tree as an intermediate step.
  185. """
  186. stacked_params = ("f", "indent", "is_lambda", "_globals")
  187. def __init__(
  188. self,
  189. version: tuple,
  190. out,
  191. scanner,
  192. showast=TREE_DEFAULT_DEBUG,
  193. debug_parser=PARSER_DEFAULT_DEBUG,
  194. compile_mode="exec",
  195. is_pypy=IS_PYPY,
  196. linestarts={},
  197. tolerate_errors=False,
  198. ):
  199. """`version' is the Python version of the Python dialect
  200. of both the syntax tree and language we should produce.
  201. `out' is IO-like file pointer to where the output should go. It
  202. would have a getvalue() method.
  203. `scanner' is a method to call when we need to scan tokens. Sometimes
  204. in producing output we will run across further tokens that need
  205. to be scanned.
  206. If `showast' is True, we print the syntax tree.
  207. `compile_mode` is is either `exec`, `single` or `lambda`.
  208. For `lambda`, the grammar that can be used in lambda
  209. expressions is used. Otherwise, it is the compile mode that
  210. was used to create the Syntax Tree and specifies a grammar
  211. variant within a Python version to use.
  212. `is_pypy` should be True if the Syntax Tree was generated for PyPy.
  213. `linestarts` is a dictionary of line number to bytecode offset. This
  214. can sometimes assist in determining which kind of source-code construct
  215. to use when there is ambiguity.
  216. """
  217. GenericASTTraversal.__init__(self, ast=None)
  218. self.scanner = scanner
  219. params = {"f": out, "indent": ""}
  220. self.version = version
  221. self.p = get_python_parser(
  222. version,
  223. debug_parser=debug_parser,
  224. compile_mode=compile_mode,
  225. is_pypy=is_pypy,
  226. )
  227. # Initialize p_lambda on demand
  228. self.p_lambda = None
  229. self.treeTransform = TreeTransform(
  230. version=self.version,
  231. show_ast=showast,
  232. str_with_template=self.str_with_template,
  233. )
  234. # FIXME: have p.insts update in a better way
  235. # modularity is broken here
  236. self.p.insts = scanner.insts
  237. self.ERROR = None
  238. self.ast_errors = []
  239. self.classes = []
  240. self.compile_mode = compile_mode
  241. self.currentclass = None
  242. self.debug_parser = dict(debug_parser)
  243. self.is_module = False
  244. self.is_pypy = is_pypy
  245. self.line_number = 1
  246. self.linestarts = linestarts
  247. self.mod_globs = set()
  248. self.name = None
  249. self.offset2inst_index = scanner.offset2inst_index
  250. self.param_stack = []
  251. self.params = params
  252. self.pending_newlines = 0
  253. self.prec = NO_PARENTHESIS_EVER
  254. self.return_none = False
  255. self.showast = showast
  256. self.source_linemap = {}
  257. self.version = version
  258. # This is in Python 2.6 on. It changes the way
  259. # strings get interpreted. See n_LOAD_CONST
  260. self.FUTURE_UNICODE_LITERALS = False
  261. # Sometimes we may want to continue decompiling when there are errors
  262. # and sometimes not
  263. self.tolerate_errors = tolerate_errors
  264. # If we are in a 3.6+ format string, we may need an
  265. # extra level of parens when seeing a lambda. We also use
  266. # this to understand whether or not to add the "f" prefix.
  267. # When not "None" it is a string of the last nonterminal
  268. # that started the format string
  269. self.in_format_string = None
  270. # hide_internal suppresses displaying the additional instructions that sometimes
  271. # exist in code but were not written in the source code.
  272. # An example is:
  273. # __module__ = __name__
  274. self.hide_internal = True
  275. customize_for_version(self, is_pypy, version)
  276. return
  277. def maybe_show_tree(self, tree, phase):
  278. phase_name = "parse_tree" if phase == "before" else "transformed_tree"
  279. if self.showast.get(phase, False):
  280. self.println(f"""\n# ---- {phase_name}:\n""" + " ")
  281. maybe_show_tree(self, tree)
  282. def str_with_template(self, tree):
  283. stream = sys.stdout
  284. stream.write(self.str_with_template1(tree, "", None))
  285. stream.write("\n")
  286. def str_with_template1(self, tree, indent: str, sibNum=None) -> str:
  287. rv = str(tree.kind)
  288. if sibNum is not None:
  289. rv = "%2d. %s" % (sibNum, rv)
  290. enumerate_children = False
  291. if len(tree) > 1:
  292. rv += f" ({len(tree)})"
  293. enumerate_children = True
  294. if tree in PRECEDENCE:
  295. rv += f", precedence {PRECEDENCE[tree]}"
  296. mapping = self._get_mapping(tree)
  297. table = mapping[0]
  298. key = tree
  299. for i in mapping[1:]:
  300. key = key[i]
  301. pass
  302. if tree.transformed_by is not None:
  303. if tree.transformed_by:
  304. rv += f" (transformed by {tree.transformed_by})"
  305. pass
  306. if key.kind in table:
  307. rv += ": %s" % str(table[key.kind])
  308. rv = indent + rv
  309. indent += " "
  310. i = 0
  311. for node in tree:
  312. if hasattr(node, "__repr1__"):
  313. if enumerate_children:
  314. child = self.str_with_template1(node, indent, i)
  315. else:
  316. child = self.str_with_template1(node, indent, None)
  317. else:
  318. inst = node.format(line_prefix="L.")
  319. if inst.startswith("\n"):
  320. # Nuke leading \n
  321. inst = inst[1:]
  322. if enumerate_children:
  323. child = indent + "%2d. %s" % (i, inst)
  324. else:
  325. child = indent + inst
  326. pass
  327. rv += "\n" + child
  328. i += 1
  329. return rv
  330. def indent_if_source_nl(self, line_number: int, indent_spaces: str):
  331. if line_number != self.line_number:
  332. self.write("\n" + indent_spaces + INDENT_PER_LEVEL[:-1])
  333. return self.line_number
  334. f = property(
  335. lambda s: s.params["f"],
  336. lambda s, x: s.params.__setitem__("f", x),
  337. lambda s: s.params.__delitem__("f"),
  338. None,
  339. )
  340. indent = property(
  341. lambda s: s.params["indent"],
  342. lambda s, x: s.params.__setitem__("indent", x),
  343. lambda s: s.params.__delitem__("indent"),
  344. None,
  345. )
  346. is_lambda = property(
  347. lambda s: s.params["is_lambda"],
  348. lambda s, x: s.params.__setitem__("is_lambda", x),
  349. lambda s: s.params.__delitem__("is_lambda"),
  350. None,
  351. )
  352. _globals = property(
  353. lambda s: s.params["_globals"],
  354. lambda s, x: s.params.__setitem__("_globals", x),
  355. lambda s: s.params.__delitem__("_globals"),
  356. None,
  357. )
  358. def set_pos_info(self, node):
  359. if hasattr(node, "linestart") and node.linestart:
  360. self.line_number = node.linestart
  361. def preorder(self, node=None):
  362. super(SourceWalker, self).preorder(node)
  363. self.set_pos_info(node)
  364. def indent_more(self, indent=TAB):
  365. self.indent += indent
  366. def indent_less(self, indent=TAB):
  367. self.indent = self.indent[: -len(indent)]
  368. def traverse(self, node, indent=None, is_lambda=False):
  369. self.param_stack.append(self.params)
  370. if indent is None:
  371. indent = self.indent
  372. p = self.pending_newlines
  373. self.pending_newlines = 0
  374. self.params = {
  375. "_globals": {},
  376. "_nonlocals": {}, # Python 3 has nonlocal
  377. "f": StringIO(),
  378. "indent": indent,
  379. "is_lambda": is_lambda,
  380. }
  381. self.preorder(node)
  382. self.f.write("\n" * self.pending_newlines)
  383. result = self.f.getvalue()
  384. self.params = self.param_stack.pop()
  385. self.pending_newlines = p
  386. return result
  387. def write(self, *data):
  388. if (len(data) == 0) or (len(data) == 1 and data[0] == ""):
  389. return
  390. out = "".join((str(j) for j in data))
  391. n = 0
  392. for i in out:
  393. if i == "\n":
  394. n += 1
  395. if n == len(out):
  396. self.pending_newlines = max(self.pending_newlines, n)
  397. return
  398. elif n:
  399. self.pending_newlines = max(self.pending_newlines, n)
  400. out = out[n:]
  401. break
  402. else:
  403. break
  404. if self.pending_newlines > 0:
  405. self.f.write("\n" * self.pending_newlines)
  406. self.pending_newlines = 0
  407. for i in out[::-1]:
  408. if i == "\n":
  409. self.pending_newlines += 1
  410. else:
  411. break
  412. if self.pending_newlines:
  413. out = out[: -self.pending_newlines]
  414. self.f.write(out)
  415. def println(self, *data):
  416. if data and not (len(data) == 1 and data[0] == ""):
  417. self.write(*data)
  418. self.pending_newlines = max(self.pending_newlines, 1)
  419. def is_return_none(self, node):
  420. # Is there a better way?
  421. ret = (
  422. node[0] == "return_expr"
  423. and node[0][0] == "expr"
  424. and node[0][0][0] == "LOAD_CONST"
  425. and node[0][0][0].pattr is None
  426. )
  427. # FIXME: should the SyntaxTree expression be folded into
  428. # the global RETURN_NONE constant?
  429. return ret or node == SyntaxTree(
  430. "return", [SyntaxTree("return_expr", [NONE]), Token("RETURN_VALUE")]
  431. )
  432. def pp_tuple(self, tup):
  433. """Pretty print a tuple"""
  434. last_line = self.f.getvalue().split("\n")[-1]
  435. ll = len(last_line) + 1
  436. indent = " " * ll
  437. self.write("(")
  438. sep = ""
  439. for item in tup:
  440. self.write(sep)
  441. ll += len(sep)
  442. s = better_repr(item)
  443. ll += len(s)
  444. self.write(s)
  445. sep = ","
  446. if ll > LINE_LENGTH:
  447. ll = 0
  448. sep += "\n" + indent
  449. else:
  450. sep += " "
  451. pass
  452. pass
  453. if len(tup) == 1:
  454. self.write(", ")
  455. self.write(")")
  456. def print_super_classes(self, node):
  457. if not (node == "tuple"):
  458. return
  459. n_subclasses = len(node[:-1])
  460. if n_subclasses > 0 or self.version > (2, 4):
  461. # Not an old-style pre-2.2 class
  462. self.write("(")
  463. line_separator = ", "
  464. sep = ""
  465. for elem in node[:-1]:
  466. value = self.traverse(elem)
  467. self.write(sep, value)
  468. sep = line_separator
  469. if n_subclasses > 0 or self.version > (2, 4):
  470. # Not an old-style pre-2.2 class
  471. self.write(")")
  472. def print_super_classes3(self, node):
  473. n = len(node) - 1
  474. j = 0
  475. if node.kind != "expr":
  476. if node == "kwarg":
  477. self.template_engine(("(%[0]{attr}=%c)", 1), node)
  478. return
  479. kwargs = None
  480. opname = node[n].kind
  481. assert opname.startswith("CALL_FUNCTION") or opname.startswith(
  482. "CALL_METHOD"
  483. )
  484. if node[n].kind.startswith("CALL_FUNCTION_KW"):
  485. # 3.6+ starts doing this
  486. kwargs = node[n - 1].attr
  487. assert isinstance(kwargs, tuple)
  488. i = n - (len(kwargs) + 1)
  489. j = 1 + n - node[n].attr
  490. else:
  491. i = start = n - 2
  492. for i in range(start, 0, -1):
  493. if not node[i].kind in ["expr", "call", "LOAD_CLASSNAME"]:
  494. break
  495. pass
  496. if i == start:
  497. return
  498. i += 2
  499. line_separator = ", "
  500. sep = ""
  501. self.write("(")
  502. if kwargs:
  503. # Last arg is tuple of keyword values: omit
  504. m = n - 1
  505. else:
  506. m = n
  507. if kwargs:
  508. # 3.6+ does this
  509. while j < i:
  510. self.write(sep)
  511. value = self.traverse(node[j])
  512. self.write("%s" % value)
  513. sep = line_separator
  514. j += 1
  515. j = 0
  516. while i < m:
  517. self.write(sep)
  518. value = self.traverse(node[i])
  519. self.write("%s=%s" % (kwargs[j], value))
  520. sep = line_separator
  521. j += 1
  522. i += 1
  523. else:
  524. while i < m:
  525. value = self.traverse(node[i])
  526. i += 1
  527. self.write(sep, value)
  528. sep = line_separator
  529. pass
  530. pass
  531. elif node == "dict_comp_async":
  532. # Handled this condition above.
  533. pass
  534. else:
  535. if node[0] == "LOAD_STR":
  536. return
  537. value = self.traverse(node[0])
  538. self.write("(")
  539. self.write(value)
  540. pass
  541. self.write(")")
  542. def template_engine(self, entry, startnode):
  543. """The format template interpretation engine. See the comment at the
  544. beginning of this module for how we interpret format
  545. specifications such as %c, %C, and so on.
  546. """
  547. # print("-----")
  548. # print(startnode.kind)
  549. # print(entry[0])
  550. # print('======')
  551. fmt = entry[0]
  552. arg = 1
  553. i = 0
  554. m = escape.search(fmt)
  555. while m:
  556. i = m.end()
  557. self.write(m.group("prefix"))
  558. typ = m.group("type") or "{"
  559. node = startnode
  560. if m.group("child"):
  561. node = node[int(m.group("child"))]
  562. if typ == "%":
  563. self.write("%")
  564. elif typ == "+":
  565. self.line_number += 1
  566. self.indent_more()
  567. elif typ == "-":
  568. self.line_number += 1
  569. self.indent_less()
  570. elif typ == "|":
  571. self.line_number += 1
  572. self.write(self.indent)
  573. # Used mostly on the LHS of an assignment
  574. # BUILD_TUPLE_n is pretty printed and may take care of other uses.
  575. elif typ == ",":
  576. if node.kind in ("unpack", "unpack_w_parens") and node[0].attr == 1:
  577. self.write(",")
  578. elif typ == "c":
  579. index = entry[arg]
  580. if isinstance(index, tuple):
  581. if isinstance(index[1], str):
  582. assert (
  583. node[index[0]] == index[1]
  584. ), "at %s[%d], expected '%s' node; got '%s'" % (
  585. node.kind,
  586. arg,
  587. index[1],
  588. node[index[0]].kind,
  589. )
  590. else:
  591. assert (
  592. node[index[0]] in index[1]
  593. ), "at %s[%d], expected to be in '%s' node; got '%s'" % (
  594. node.kind,
  595. arg,
  596. index[1],
  597. node[index[0]].kind,
  598. )
  599. index = index[0]
  600. assert isinstance(
  601. index, int
  602. ), "at %s[%d], %s should be int or tuple" % (
  603. node.kind,
  604. arg,
  605. type(index),
  606. )
  607. try:
  608. node[index]
  609. except IndexError:
  610. raise RuntimeError(
  611. f"""
  612. Expanding '{node.kind}' in template '{entry}[{arg}]':
  613. {index} is invalid; has only {len(node)} entries
  614. """
  615. )
  616. self.preorder(node[index])
  617. arg += 1
  618. elif typ == "p":
  619. p = self.prec
  620. # entry[arg]
  621. tup = entry[arg]
  622. assert isinstance(tup, tuple)
  623. if len(tup) == 3:
  624. (index, nonterm_name, self.prec) = tup
  625. if isinstance(tup[1], str):
  626. assert (
  627. node[index] == nonterm_name
  628. ), "at %s[%d], expected '%s' node; got '%s'" % (
  629. node.kind,
  630. arg,
  631. nonterm_name,
  632. node[index].kind,
  633. )
  634. else:
  635. assert node[tup[0]] in tup[1], (
  636. f"at {node.kind}[{tup[0]}], expected to be in '{tup[1]}' "
  637. f"node; got '{node[tup[0]].kind}'"
  638. )
  639. else:
  640. assert len(tup) == 2
  641. (index, self.prec) = entry[arg]
  642. self.preorder(node[index])
  643. self.prec = p
  644. arg += 1
  645. elif typ == "C":
  646. low, high, sep = entry[arg]
  647. remaining = len(node[low:high])
  648. for subnode in node[low:high]:
  649. self.preorder(subnode)
  650. remaining -= 1
  651. if remaining > 0:
  652. self.write(sep)
  653. pass
  654. pass
  655. arg += 1
  656. elif typ == "D":
  657. low, high, sep = entry[arg]
  658. remaining = len(node[low:high])
  659. for subnode in node[low:high]:
  660. remaining -= 1
  661. if len(subnode) > 0:
  662. self.preorder(subnode)
  663. if remaining > 0:
  664. self.write(sep)
  665. pass
  666. pass
  667. pass
  668. arg += 1
  669. elif typ == "x":
  670. # This code is only used in fragments
  671. assert isinstance(entry[arg], tuple)
  672. arg += 1
  673. elif typ == "P":
  674. p = self.prec
  675. low, high, sep, self.prec = entry[arg]
  676. remaining = len(node[low:high])
  677. # remaining = len(node[low:high])
  678. for subnode in node[low:high]:
  679. self.preorder(subnode)
  680. remaining -= 1
  681. if remaining > 0:
  682. self.write(sep)
  683. self.prec = p
  684. arg += 1
  685. elif typ == "{":
  686. expr = m.group("expr")
  687. # Line mapping stuff
  688. if (
  689. hasattr(node, "linestart")
  690. and node.linestart
  691. and hasattr(node, "current_line_number")
  692. ):
  693. self.source_linemap[self.current_line_number] = node.linestart
  694. if expr[0] == "%":
  695. index = entry[arg]
  696. self.template_engine((expr, index), node)
  697. arg += 1
  698. else:
  699. d = node.__dict__
  700. try:
  701. self.write(eval(expr, d, d))
  702. except Exception:
  703. raise
  704. m = escape.search(fmt, i)
  705. self.write(fmt[i:])
  706. def default(self, node):
  707. mapping = self._get_mapping(node)
  708. table = mapping[0]
  709. key = node
  710. for i in mapping[1:]:
  711. key = key[i]
  712. pass
  713. if key.kind in table:
  714. self.template_engine(table[key.kind], node)
  715. self.prune()
  716. def customize(self, customize):
  717. """
  718. Special handling for opcodes, such as those that take a variable number
  719. of arguments -- we add a new entry for each in TABLE_R.
  720. """
  721. for k, v in list(customize.items()):
  722. if k in TABLE_R:
  723. continue
  724. op = k[: k.rfind("_")]
  725. if k.startswith("CALL_METHOD"):
  726. # This happens in PyPy and Python 3.7+
  727. TABLE_R[k] = ("%c(%P)", 0, (1, -1, ", ", 100))
  728. elif k.startswith("CALL_FUNCTION_KW"):
  729. TABLE_R[k] = ("%c(%P)", 0, (1, -1, ", ", 100))
  730. elif op == "CALL_FUNCTION":
  731. TABLE_R[k] = (
  732. "%c(%P)",
  733. (0, "expr"),
  734. (1, -1, ", ", PRECEDENCE["yield"] - 1),
  735. )
  736. elif op in (
  737. "CALL_FUNCTION_VAR",
  738. "CALL_FUNCTION_VAR_KW",
  739. "CALL_FUNCTION_KW",
  740. ):
  741. # FIXME: handle everything in customize.
  742. # Right now, some of this is here, and some in that.
  743. if v == 0:
  744. template_str = "%c(%C" # '%C' is a dummy here ...
  745. p2 = (0, 0, None) # because of the None in this
  746. else:
  747. template_str = "%c(%C, "
  748. p2 = (1, -2, ", ")
  749. if op == "CALL_FUNCTION_VAR":
  750. # Python 3.5 only puts optional args (the VAR part)
  751. # the lowest down the stack
  752. if self.version == (3, 5):
  753. if template_str == "%c(%C, ":
  754. entry = ("%c(*%C, %c)", 0, p2, -2)
  755. elif template_str == "%c(%C":
  756. entry = ("%c(*%C)", 0, (1, 100, ""))
  757. elif self.version == (3, 4):
  758. # CALL_FUNCTION_VAR's top element of the stack contains
  759. # the variable argument list
  760. if v == 0:
  761. template_str = "%c(*%c)"
  762. entry = (template_str, 0, -2)
  763. else:
  764. template_str = "%c(%C, *%c)"
  765. entry = (template_str, 0, p2, -2)
  766. else:
  767. template_str += "*%c)"
  768. entry = (template_str, 0, p2, -2)
  769. elif op == "CALL_FUNCTION_KW":
  770. template_str += "**%c)"
  771. entry = (template_str, 0, p2, -2)
  772. elif op == "CALL_FUNCTION_VAR_KW":
  773. template_str += "*%c, **%c)"
  774. # Python 3.5 only puts optional args (the VAR part)
  775. # the lowest down the stack
  776. na = v & 0xFF # positional parameters
  777. if self.version == (3, 5) and na == 0:
  778. if p2[2]:
  779. p2 = (2, -2, ", ")
  780. entry = (template_str, 0, p2, 1, -2)
  781. else:
  782. if p2[2]:
  783. p2 = (1, -3, ", ")
  784. entry = (template_str, 0, p2, -3, -2)
  785. pass
  786. else:
  787. assert False, "Unhandled CALL_FUNCTION %s" % op
  788. TABLE_R[k] = entry
  789. pass
  790. # handled by n_dict:
  791. # if op == 'BUILD_SLICE': TABLE_R[k] = ('%C' , (0,-1,':'))
  792. # handled by n_list:
  793. # if op == 'BUILD_LIST': TABLE_R[k] = ('[%C]' , (0,-1,', '))
  794. # elif op == 'BUILD_TUPLE': TABLE_R[k] = ('(%C%,)', (0,-1,', '))
  795. pass
  796. return
  797. def build_class(self, code):
  798. """Dump class definition, doc string and class body."""
  799. assert iscode(code)
  800. self.classes.append(self.currentclass)
  801. code = Code(code, self.scanner, self.currentclass)
  802. indent = self.indent
  803. # self.println(indent, '#flags:\t', int(code.co_flags))
  804. tree = self.build_ast(code._tokens, code._customize, code)
  805. # save memory by deleting no-longer-used structures
  806. code._tokens = None
  807. assert tree == "stmts"
  808. if tree[0] == "docstring":
  809. self.println(self.traverse(tree[0]))
  810. del tree[0]
  811. first_stmt = tree[0]
  812. try:
  813. if first_stmt == NAME_MODULE:
  814. if self.hide_internal:
  815. del tree[0]
  816. first_stmt = tree[0]
  817. pass
  818. except Exception:
  819. pass
  820. have_qualname = False
  821. # Python 3.4+ has constants like 'cmp_to_key.<locals>.K'
  822. # which are not simple classes like the < 3 case.
  823. try:
  824. if (
  825. first_stmt == "assign"
  826. and first_stmt[0][0] == "LOAD_STR"
  827. and first_stmt[1] == "store"
  828. and first_stmt[1][0] == Token("STORE_NAME", pattr="__qualname__")
  829. ):
  830. have_qualname = True
  831. except Exception:
  832. pass
  833. if have_qualname:
  834. if self.hide_internal:
  835. del tree[0]
  836. pass
  837. globals, nonlocals = find_globals_and_nonlocals(
  838. tree, set(), set(), code, self.version
  839. )
  840. # Add "global" declaration statements at the top
  841. # of the function
  842. for g in sorted(globals):
  843. self.println(indent, "global ", g)
  844. for nl in sorted(nonlocals):
  845. self.println(indent, "nonlocal ", nl)
  846. old_name = self.name
  847. self.gen_source(tree, code.co_name, code._customize)
  848. self.name = old_name
  849. # save memory by deleting no-longer-used structures
  850. code._tokens = None
  851. code._customize = None
  852. self.classes.pop(-1)
  853. def gen_source(
  854. self,
  855. tree,
  856. name,
  857. customize,
  858. is_lambda=False,
  859. returnNone=False,
  860. debug_opts=DEFAULT_DEBUG_OPTS,
  861. ):
  862. """convert parse tree to Python source code"""
  863. rn = self.return_none
  864. self.return_none = returnNone
  865. old_name = self.name
  866. self.name = name
  867. self.debug_opts = debug_opts
  868. # if code would be empty, append 'pass'
  869. if len(tree) == 0:
  870. self.println(self.indent, "pass")
  871. else:
  872. self.customize(customize)
  873. self.text = self.traverse(tree, is_lambda=is_lambda)
  874. # In a formatted string using "lambda', we should not add "\n".
  875. # For example in:
  876. # f'{(lambda x:x)("8")!r}'
  877. # Adding a "\n" after "lambda x: x" will give an error message:
  878. # SyntaxError: f-string expression part cannot include a backslash
  879. # So avoid that.
  880. printfn = (
  881. self.write if (self.in_format_string or is_lambda) else self.println
  882. )
  883. printfn(self.text)
  884. self.name = old_name
  885. self.return_none = rn
  886. def build_ast(
  887. self,
  888. tokens,
  889. customize,
  890. code,
  891. is_lambda=False,
  892. noneInNames=False,
  893. is_top_level_module=False,
  894. ) -> GenericASTTraversal:
  895. # FIXME: DRY with fragments.py
  896. # assert isinstance(tokens[0], Token)
  897. if is_lambda:
  898. for t in tokens:
  899. if t.kind == "RETURN_END_IF":
  900. t.kind = "RETURN_END_IF_LAMBDA"
  901. elif t.kind == "RETURN_VALUE":
  902. t.kind = "RETURN_VALUE_LAMBDA"
  903. tokens.append(Token("LAMBDA_MARKER", optype="pseudo"))
  904. try:
  905. if self.p_lambda is None:
  906. self.p_lambda = get_python_parser(
  907. self.version,
  908. self.debug_parser,
  909. compile_mode="lambda",
  910. is_pypy=self.is_pypy,
  911. )
  912. p = self.p_lambda
  913. p.insts = self.scanner.insts
  914. p.offset2inst_index = self.scanner.offset2inst_index
  915. parse_tree = python_parser.parse(p, tokens, customize, is_lambda)
  916. self.customize(customize)
  917. except (heads.ParserError, AssertionError) as e:
  918. raise ParserError(e, tokens, self.p.debug["reduce"])
  919. transform_tree = self.treeTransform.transform(
  920. parse_tree, code, self.println
  921. )
  922. del parse_tree # Save memory
  923. return transform_tree
  924. # The bytecode for the end of the main routine has a "return
  925. # None". However, you can't issue a "return" statement in
  926. # main. So as the old cigarette slogan goes: I'd rather switch
  927. # (the token stream) than fight (with the grammar to not emit
  928. # "return None").
  929. if self.hide_internal:
  930. if len(tokens) >= 2 and not noneInNames:
  931. if tokens[-1].kind in ("RETURN_VALUE", "RETURN_VALUE_LAMBDA"):
  932. # Python 3.4's classes can add a "return None" which is
  933. # invalid syntax.
  934. load_const = tokens[-2]
  935. if load_const.kind == "LOAD_CONST":
  936. if is_top_level_module or load_const.pattr is None:
  937. del tokens[-2:]
  938. else:
  939. tokens.append(Token("RETURN_LAST"))
  940. else:
  941. tokens.append(Token("RETURN_LAST"))
  942. if len(tokens) == 0:
  943. return PASS
  944. # Build a parse tree from a tokenized and massaged disassembly.
  945. try:
  946. # FIXME: have p.insts update in a better way
  947. # Modularity is broken here.
  948. p_insts = self.p.insts
  949. self.p.insts = self.scanner.insts
  950. self.p.offset2inst_index = self.scanner.offset2inst_index
  951. self.p.opc = self.scanner.opc
  952. parse_tree = python_parser.parse(
  953. self.p, tokens, customize, is_lambda=is_lambda
  954. )
  955. self.p.insts = p_insts
  956. except (ParserError, AssertionError) as e:
  957. raise ParserError(e, tokens, self.p.debug["reduce"])
  958. checker(parse_tree, False, self.ast_errors)
  959. self.customize(customize)
  960. transform_tree = self.treeTransform.transform(parse_tree, code, self.println)
  961. del parse_tree # Save memory
  962. return transform_tree
  963. @classmethod
  964. def _get_mapping(cls, node):
  965. return MAP.get(node, MAP_DIRECT)
  966. def code_deparse(
  967. co,
  968. out=sys.stdout,
  969. version: Optional[tuple] = None,
  970. debug_opts=DEFAULT_DEBUG_OPTS,
  971. code_objects={},
  972. compile_mode="exec",
  973. is_pypy=IS_PYPY,
  974. walker=SourceWalker,
  975. start_offset: int = 0,
  976. stop_offset: int = -1,
  977. ) -> Optional[SourceWalker]:
  978. """
  979. ingests and deparses a given code block 'co'. If version is None,
  980. we will use the current Python interpreter version.
  981. """
  982. assert iscode(co)
  983. if out is None:
  984. out = sys.stdout
  985. if version is None:
  986. version = PYTHON_VERSION_TRIPLE
  987. # store final output stream for case of error
  988. scanner = get_scanner(version, is_pypy=is_pypy, show_asm=debug_opts["asm"])
  989. tokens, customize = scanner.ingest(
  990. co, code_objects=code_objects, show_asm=debug_opts["asm"]
  991. )
  992. if start_offset > 0:
  993. for i, t in enumerate(tokens):
  994. # If t.offset is a string, we want to skip this.
  995. if isinstance(t.offset, int) and t.offset >= start_offset:
  996. tokens = tokens[i:]
  997. break
  998. if stop_offset > -1:
  999. for i, t in enumerate(tokens):
  1000. # In contrast to the test for start_offset If t.offset is
  1001. # a string, we want to extract the integer offset value.
  1002. if t.off2int() >= stop_offset:
  1003. tokens = tokens[:i]
  1004. break
  1005. debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG))
  1006. # Build Syntax Tree from disassembly.
  1007. linestarts = dict(scanner.opc.findlinestarts(co))
  1008. deparsed = walker(
  1009. version,
  1010. out,
  1011. scanner,
  1012. showast=debug_opts.get("tree", TREE_DEFAULT_DEBUG),
  1013. debug_parser=debug_parser,
  1014. compile_mode=compile_mode,
  1015. is_pypy=is_pypy,
  1016. linestarts=linestarts,
  1017. )
  1018. is_top_level_module = co.co_name == "<module>"
  1019. if compile_mode == "eval":
  1020. deparsed.hide_internal = False
  1021. deparsed.compile_mode = compile_mode
  1022. deparsed.ast = deparsed.build_ast(
  1023. tokens,
  1024. customize,
  1025. co,
  1026. is_lambda=is_lambda_mode(compile_mode),
  1027. is_top_level_module=is_top_level_module,
  1028. )
  1029. # XXX workaround for profiling
  1030. if deparsed.ast is None:
  1031. return None
  1032. # FIXME use a lookup table here.
  1033. if is_lambda_mode(compile_mode):
  1034. expected_start = "lambda_start"
  1035. elif compile_mode == "eval":
  1036. expected_start = "expr_start"
  1037. elif compile_mode == "expr":
  1038. expected_start = "expr_start"
  1039. elif compile_mode == "exec":
  1040. expected_start = "stmts"
  1041. elif compile_mode == "single":
  1042. expected_start = "single_start"
  1043. else:
  1044. expected_start = None
  1045. if expected_start:
  1046. assert deparsed.ast == expected_start, (
  1047. f"Should have parsed grammar start to '{expected_start}'; "
  1048. f"got: {deparsed.ast.kind}"
  1049. )
  1050. # save memory
  1051. del tokens
  1052. deparsed.mod_globs, nonlocals = find_globals_and_nonlocals(
  1053. deparsed.ast, set(), set(), co, version
  1054. )
  1055. deparsed.is_module = compile_mode not in (
  1056. "dictcomp",
  1057. "gencomp",
  1058. "genexpr",
  1059. "lambda",
  1060. "listcomp",
  1061. "setcomp",
  1062. )
  1063. if deparsed.is_module:
  1064. assert not nonlocals
  1065. deparsed.FUTURE_UNICODE_LITERALS = (
  1066. COMPILER_FLAG_BIT["FUTURE_UNICODE_LITERALS"] & co.co_flags != 0
  1067. )
  1068. # What we've been waiting for: Generate source from Syntax Tree!
  1069. deparsed.gen_source(
  1070. deparsed.ast,
  1071. name=co.co_name,
  1072. customize=customize,
  1073. is_lambda=is_lambda_mode(compile_mode),
  1074. debug_opts=debug_opts,
  1075. )
  1076. for g in sorted(deparsed.mod_globs):
  1077. deparsed.write("# global %s ## Warning: Unused global\n" % g)
  1078. if deparsed.ast_errors:
  1079. deparsed.write("# NOTE: have internal decompilation grammar errors.\n")
  1080. deparsed.write("# Use -T option to show full context.")
  1081. for err in deparsed.ast_errors:
  1082. deparsed.write(err)
  1083. raise SourceWalkerError("Deparsing hit an internal grammar-rule bug")
  1084. if deparsed.ERROR:
  1085. raise SourceWalkerError("Deparsing stopped due to parse error")
  1086. return deparsed
  1087. def deparse_code2str(
  1088. code,
  1089. out=sys.stdout,
  1090. version=None,
  1091. debug_opts=DEFAULT_DEBUG_OPTS,
  1092. code_objects={},
  1093. compile_mode="exec",
  1094. is_pypy=IS_PYPY,
  1095. walker=SourceWalker,
  1096. start_offset: int = 0,
  1097. stop_offset: int = -1,
  1098. ) -> str:
  1099. """
  1100. Return the deparsed text for a Python code object. `out` is where
  1101. any intermediate output for assembly or tree output will be sent.
  1102. """
  1103. if out is None:
  1104. out = sys.stdout
  1105. tree = code_deparse(
  1106. code,
  1107. out,
  1108. version,
  1109. debug_opts,
  1110. code_objects=code_objects,
  1111. compile_mode=compile_mode,
  1112. is_pypy=is_pypy,
  1113. walker=walker,
  1114. start_offset=start_offset,
  1115. stop_offset=stop_offset,
  1116. )
  1117. return "# deparse failed" if tree is None else tree.text
  1118. if __name__ == "__main__":
  1119. def deparse_test(co):
  1120. """This is a docstring"""
  1121. s = deparse_code2str(co)
  1122. # s = deparse_code2str(co, debug_opts={"asm": "after", "tree": {'before': False, 'after': False}})
  1123. print(s)
  1124. return
  1125. deparse_test(deparse_test.__code__)