| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496 |
- # Copyright (c) 2015-2024 by Rocky Bernstein
- # Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
- # Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
- # Copyright (c) 1999 John Aycock
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- """Creates Python source code from an uncompyle6 parse tree.
- The terminal symbols are CPython bytecode instructions. (See the
- python documentation under module "dis" for a list of instructions
- and what they mean).
- Upper levels of the grammar is a more-or-less conventional grammar for
- Python.
- """
- # The below is a bit long, but still it is somewhat abbreviated.
- # See https://github.com/rocky/python-uncompyle6/wiki/Table-driven-semantic-actions.
- # for a more complete explanation, nicely marked up and with examples.
- #
- #
- # Semantic action rules for nonterminal symbols can be specified here by
- # creating a method prefaced with "n_" for that nonterminal. For
- # example, "n_exec_stmt" handles the semantic actions for the
- # "exec_stmt" nonterminal symbol. Similarly if a method with the name
- # of the nonterminal is suffixed with "_exit" it will be called after
- # all of its children are called.
- #
- # After a while writing methods this way, you'll find many routines which do similar
- # sorts of things, and soon you'll find you want a short notation to
- # describe rules and not have to create methods at all.
- #
- # So another other way to specify a semantic rule for a nonterminal is via
- # either tables MAP_R, or MAP_DIRECT where the key is the
- # nonterminal name.
- #
- # These dictionaries use a printf-like syntax to direct substitution
- # from attributes of the nonterminal and its children..
- #
- # The rest of the below describes how table-driven semantic actions work
- # and gives a list of the format specifiers. The default() and
- # template_engine() methods implement most of the below.
- #
- # We allow for a couple of ways to interact with a node in a tree. So
- # step 1 after not seeing a custom method for a nonterminal is to
- # determine from what point of view tree-wise the rule is applied.
- # In the diagram below, N is a nonterminal name, and K also a nonterminal
- # name but the one used as a key in the table.
- # we show where those are with respect to each other in the
- # parse tree for N.
- #
- #
- # N&K N
- # / | ... \ / | ... \
- # O O O O O K
- #
- #
- # TABLE_DIRECT TABLE_R
- #
- # The default table is TABLE_DIRECT mapping By far, most rules used work this way.
- #
- # The key K is then extracted from the subtree and used to find one
- # of the tables, T listed above. The result after applying T[K] is
- # a format string and arguments (a la printf()) for the formatting
- # engine.
- #
- # Escapes in the format string are:
- #
- # %c evaluate/traverse the node recursively. Its argument is a single
- # integer or tuple representing a node index.
- # If a tuple is given, the first item is the node index while
- # the second item is a string giving the node/noterminal name.
- # This name will be checked at runtime against the node type.
- #
- # %p like %c but sets the operator precedence.
- # Its argument then is a tuple indicating the node
- # index and the precedence value, an integer. If 3 items are given,
- # the second item is the nonterminal name and the precedence is given last.
- #
- # %C evaluate/travers children recursively, with sibling children separated by the
- # given string. It needs a 3-tuple: a starting node, the maximum
- # value of an end node, and a string to be inserted between sibling children
- #
- # %, Append ',' if last %C only printed one item. This is mostly for tuples
- # on the LHS of an assignment statement since BUILD_TUPLE_n pretty-prints
- # other tuples. The specifier takes no arguments
- #
- # %P same as %C but sets operator precedence. Its argument is a 4-tuple:
- # the node low and high indices, the separator, a string the precedence
- # value, an integer.
- #
- # %D Same as `%C` this is for left-recursive lists like kwargs where goes
- # to epsilon at the beginning. It needs a 3-tuple: a starting node, the
- # maximum value of an end node, and a string to be inserted between
- # sibling children. If we were to use `%C` an extra separator with an
- # epsilon would appear at the beginning.
- #
- # %| Insert spaces to the current indentation level. Takes no arguments.
- #
- # %+ increase current indentation level. Takes no arguments.
- #
- # %- decrease current indentation level. Takes no arguments.
- #
- # %{EXPR} Python eval(EXPR) in context of node. Takes no arguments
- #
- # %[N]{EXPR} Python eval(EXPR) in context of node[N]. Takes no arguments
- #
- # %[N]{%X} evaluate/recurse on child node[N], using specifier %X.
- # %X can be one of the above, e.g. %c, %p, etc. Takes the arguments
- # that the specifier uses.
- #
- # %% literal '%'. Takes no arguments.
- #
- #
- # The '%' may optionally be followed by a number (C) in square
- # brackets, which makes the template_engine walk down to N[C] before
- # evaluating the escape code.
- import sys
- from io import StringIO
- from typing import Optional
- from spark_parser import GenericASTTraversal
- from xdis import COMPILER_FLAG_BIT, IS_PYPY, iscode
- from xdis.version_info import PYTHON_VERSION_TRIPLE
- from uncompyle6.parser import get_python_parser, parse
- from uncompyle6.parsers.treenode import SyntaxTree
- from uncompyle6.scanner import Code, get_scanner
- from uncompyle6.scanners.tok import Token
- from uncompyle6.semantics.check_ast import checker
- from uncompyle6.semantics.consts import (
- ASSIGN_TUPLE_PARAM,
- INDENT_PER_LEVEL,
- LINE_LENGTH,
- NAME_MODULE,
- NO_PARENTHESIS_EVER,
- NONE,
- PASS,
- PRECEDENCE,
- RETURN_LOCALS,
- RETURN_NONE,
- TAB,
- TABLE_DIRECT,
- TABLE_R,
- escape,
- )
- from uncompyle6.semantics.customize import customize_for_version
- from uncompyle6.semantics.gencomp import ComprehensionMixin
- from uncompyle6.semantics.helper import (
- find_globals_and_nonlocals,
- is_lambda_mode,
- print_docstring,
- )
- from uncompyle6.semantics.make_function1 import make_function1
- from uncompyle6.semantics.make_function2 import make_function2
- from uncompyle6.semantics.make_function3 import make_function3
- from uncompyle6.semantics.make_function36 import make_function36
- from uncompyle6.semantics.n_actions import NonterminalActions
- from uncompyle6.semantics.parser_error import ParserError
- from uncompyle6.semantics.transform import TreeTransform, is_docstring
- from uncompyle6.show import maybe_show_tree
- from uncompyle6.util import better_repr
- def unicode(x):
- return x
- PARSER_DEFAULT_DEBUG = {
- "rules": False,
- "transition": False,
- "reduce": False,
- "errorstack": "full",
- "context": True,
- "dups": False,
- }
- TREE_DEFAULT_DEBUG = {"before": False, "after": False}
- DEFAULT_DEBUG_OPTS = {
- "asm": False,
- "tree": TREE_DEFAULT_DEBUG,
- "grammar": dict(PARSER_DEFAULT_DEBUG),
- }
- class SourceWalkerError(Exception):
- def __init__(self, errmsg):
- self.errmsg = errmsg
- def __str__(self):
- return self.errmsg
- class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
- """
- Class to traverse a Parse Tree of the bytecode instruction built from parsing to
- produce some sort of source text.
- The Parse tree may be turned an Abstract Syntax tree as an intermediate step.
- """
- stacked_params = ("f", "indent", "is_lambda", "_globals")
- def __init__(
- self,
- version: tuple,
- out,
- scanner,
- showast=TREE_DEFAULT_DEBUG,
- debug_parser=PARSER_DEFAULT_DEBUG,
- compile_mode="exec",
- is_pypy=IS_PYPY,
- linestarts={},
- tolerate_errors=False,
- ):
- """`version' is the Python version of the Python dialect
- of both the syntax tree and language we should produce.
- `out' is IO-like file pointer to where the output should go. It
- would have a getvalue() method.
- `scanner' is a method to call when we need to scan tokens. Sometimes
- in producing output we will run across further tokens that need
- to be scanned.
- If `showast' is True, we print the syntax tree.
- `compile_mode` is is either `exec`, `single` or `lambda`.
- For `lambda`, the grammar that can be used in lambda
- expressions is used. Otherwise, it is the compile mode that
- was used to create the Syntax Tree and specifies a grammar
- variant within a Python version to use.
- `is_pypy` should be True if the Syntax Tree was generated for PyPy.
- `linestarts` is a dictionary of line number to bytecode offset. This
- can sometimes assist in determining which kind of source-code construct
- to use when there is ambiguity.
- """
- GenericASTTraversal.__init__(self, ast=None)
- self.scanner = scanner
- params = {"f": out, "indent": ""}
- self.version = version
- self.p = get_python_parser(
- version,
- debug_parser=dict(debug_parser),
- compile_mode=compile_mode,
- is_pypy=is_pypy,
- )
- self.ERROR = None
- self.ast_errors = []
- self.classes = []
- self.compile_mode = compile_mode
- self.currentclass = None
- self.debug_parser = dict(debug_parser)
- self.is_pypy = is_pypy
- self.linemap = {}
- self.line_number = 1
- self.linestarts = linestarts
- self.mod_globs = set()
- self.name = None
- self.offset2inst_index = scanner.offset2inst_index
- self.param_stack = []
- self.params = params
- self.pending_newlines = 0
- self.prec = NO_PARENTHESIS_EVER
- self.return_none = False
- self.showast = showast
- self.version = version
- self.treeTransform = TreeTransform(version=self.version, show_ast=showast)
- # FIXME: have p.insts update in a better way
- # modularity is broken here
- self.insts = scanner.insts
- # Initialize p_lambda on demand
- self.p_lambda = None
- # This is in Python 2.6 on. It changes the way
- # strings get interpreted. See n_LOAD_CONST
- self.FUTURE_UNICODE_LITERALS = False
- # Sometimes we may want to continue decompiling when there are errors
- # and sometimes not
- self.tolerate_errors = tolerate_errors
- # If we are in a 3.6+ format string, we may need an
- # extra level of parens when seeing a lambda. We also use
- # this to understand whether or not to add the "f" prefix.
- # When not "None" it is a string of the last nonterminal
- # that started the format string
- self.in_format_string = None
- # hide_internal suppresses displaying the additional instructions that sometimes
- # exist in code but were not written in the source code.
- # An example is:
- # __module__ = __name__
- self.hide_internal = True
- self.TABLE_DIRECT = TABLE_DIRECT.copy()
- self.TABLE_R = TABLE_R.copy()
- self.MAP_DIRECT = (self.TABLE_DIRECT,)
- self.MAP_R = (self.TABLE_R, -1)
- self.MAP = {
- "stmt": self.MAP_R,
- "call": self.MAP_R,
- "delete": self.MAP_R,
- "store": self.MAP_R,
- }
- customize_for_version(self, is_pypy, version)
- return
- def maybe_show_tree(self, tree, phase):
- if self.showast.get("before", False):
- self.println(
- """
- ---- end before transform
- """
- + " "
- )
- if self.showast.get("after", False):
- self.println(
- """
- ---- begin after transform
- """
- + " "
- )
- if self.showast.get(phase, False):
- maybe_show_tree(self, tree)
- def str_with_template(self, ast):
- stream = sys.stdout
- stream.write(self.str_with_template1(ast, "", None))
- stream.write("\n")
- def str_with_template1(self, ast, indent, sibNum=None) -> str:
- rv = str(ast.kind)
- if sibNum is not None:
- rv = "%2d. %s" % (sibNum, rv)
- enumerate_children = False
- if len(ast) > 1:
- rv += f" ({len(ast)})"
- enumerate_children = True
- if ast in PRECEDENCE:
- rv += f", precedence {PRECEDENCE[ast]}"
- mapping = self._get_mapping(ast)
- table = mapping[0]
- key = ast
- for i in mapping[1:]:
- key = key[i]
- pass
- if ast.transformed_by is not None:
- if ast.transformed_by is True:
- rv += " transformed"
- else:
- rv += " transformed by %s" % ast.transformed_by
- pass
- pass
- if key.kind in table:
- rv += ": %s" % str(table[key.kind])
- rv = indent + rv
- indent += " "
- i = 0
- for node in ast:
- if hasattr(node, "__repr1__"):
- if enumerate_children:
- child = self.str_with_template1(node, indent, i)
- else:
- child = self.str_with_template1(node, indent, None)
- else:
- inst = node.format(line_prefix="L.")
- if inst.startswith("\n"):
- # Nuke leading \n
- inst = inst[1:]
- if enumerate_children:
- child = indent + "%2d. %s" % (i, inst)
- else:
- child = indent + inst
- pass
- rv += "\n" + child
- i += 1
- return rv
- def indent_if_source_nl(self, line_number: int, indent_spaces: str):
- if line_number != self.line_number:
- self.write("\n" + indent_spaces + INDENT_PER_LEVEL[:-1])
- return self.line_number
- f = property(
- lambda s: s.params["f"],
- lambda s, x: s.params.__setitem__("f", x),
- lambda s: s.params.__delitem__("f"),
- None,
- )
- indent = property(
- lambda s: s.params["indent"],
- lambda s, x: s.params.__setitem__("indent", x),
- lambda s: s.params.__delitem__("indent"),
- None,
- )
- is_lambda = property(
- lambda s: s.params["is_lambda"],
- lambda s, x: s.params.__setitem__("is_lambda", x),
- lambda s: s.params.__delitem__("is_lambda"),
- None,
- )
- _globals = property(
- lambda s: s.params["_globals"],
- lambda s, x: s.params.__setitem__("_globals", x),
- lambda s: s.params.__delitem__("_globals"),
- None,
- )
- def set_pos_info(self, node):
- if hasattr(node, "linestart") and node.linestart:
- self.line_number = node.linestart
- def preorder(self, node=None):
- super(SourceWalker, self).preorder(node)
- self.set_pos_info(node)
- def indent_more(self, indent=TAB):
- self.indent += indent
- def indent_less(self, indent=TAB):
- self.indent = self.indent[: -len(indent)]
- def traverse(self, node, indent=None, is_lambda=False):
- self.param_stack.append(self.params)
- if indent is None:
- indent = self.indent
- p = self.pending_newlines
- self.pending_newlines = 0
- self.params = {
- "_globals": {},
- "_nonlocals": {}, # Python 3 has nonlocal
- "f": StringIO(),
- "indent": indent,
- "is_lambda": is_lambda,
- }
- self.preorder(node)
- self.f.write("\n" * self.pending_newlines)
- result = self.f.getvalue()
- self.params = self.param_stack.pop()
- self.pending_newlines = p
- return result
- def write(self, *data):
- if (len(data) == 0) or (len(data) == 1 and data[0] == ""):
- return
- out = "".join((str(j) for j in data))
- n = 0
- for i in out:
- if i == "\n":
- n += 1
- if n == len(out):
- self.pending_newlines = max(self.pending_newlines, n)
- return
- elif n:
- self.pending_newlines = max(self.pending_newlines, n)
- out = out[n:]
- break
- else:
- break
- if self.pending_newlines > 0:
- self.f.write("\n" * self.pending_newlines)
- self.pending_newlines = 0
- for i in out[::-1]:
- if i == "\n":
- self.pending_newlines += 1
- else:
- break
- if self.pending_newlines:
- out = out[: -self.pending_newlines]
- self.f.write(out)
- def println(self, *data):
- if data and not (len(data) == 1 and data[0] == ""):
- self.write(*data)
- self.pending_newlines = max(self.pending_newlines, 1)
- def is_return_none(self, node):
- # Is there a better way?
- ret = (
- node[0] == "return_expr"
- and node[0][0] == "expr"
- and node[0][0][0] == "LOAD_CONST"
- and node[0][0][0].pattr is None
- )
- if self.version <= (2, 6):
- return ret
- else:
- # FIXME: should the SyntaxTree expression be folded into
- # the global RETURN_NONE constant?
- return ret or node == SyntaxTree(
- "return", [SyntaxTree("return_expr", [NONE]), Token("RETURN_VALUE")]
- )
- def pp_tuple(self, tup):
- """Pretty print a tuple"""
- last_line = self.f.getvalue().split("\n")[-1]
- ll = len(last_line) + 1
- indent = " " * ll
- self.write("(")
- sep = ""
- for item in tup:
- self.write(sep)
- ll += len(sep)
- s = better_repr(item, self.version)
- ll += len(s)
- self.write(s)
- sep = ","
- if ll > LINE_LENGTH:
- ll = 0
- sep += "\n" + indent
- else:
- sep += " "
- pass
- pass
- if len(tup) == 1:
- self.write(", ")
- self.write(")")
- # Python changes make function this much that we need at least 3 different routines,
- # and probably more in the future.
- def make_function(self, node, is_lambda, nested=1, code_node=None, annotate=None):
- if self.version <= (1, 2):
- make_function1(self, node, is_lambda, nested, code_node)
- elif self.version <= (2, 7):
- make_function2(self, node, is_lambda, nested, code_node)
- elif (3, 0) <= self.version < (3, 6):
- make_function3(self, node, is_lambda, nested, code_node)
- elif self.version >= (3, 6):
- make_function36(self, node, is_lambda, nested, code_node)
- def print_super_classes(self, node):
- if not (node == "tuple"):
- return
- n_subclasses = len(node[:-1])
- if n_subclasses > 0 or self.version > (2, 4):
- # Not an old-style pre-2.2 class
- self.write("(")
- line_separator = ", "
- sep = ""
- for elem in node[:-1]:
- value = self.traverse(elem)
- self.write(sep, value)
- sep = line_separator
- if n_subclasses > 0 or self.version > (2, 4):
- # Not an old-style pre-2.2 class
- self.write(")")
- def print_super_classes3(self, node):
- n = len(node) - 1
- j = 0
- if node.kind != "expr":
- if node == "kwarg":
- self.template_engine(("(%[0]{attr}=%c)", 1), node)
- return
- kwargs = None
- assert node[n].kind.startswith("CALL_FUNCTION")
- if node[n].kind.startswith("CALL_FUNCTION_KW"):
- if self.is_pypy:
- # FIXME: this doesn't handle positional and keyword args
- # properly. Need to do something more like that below
- # in the non-PYPY 3.6 case.
- self.template_engine(("(%[0]{attr}=%c)", 1), node[n - 1])
- return
- else:
- kwargs = node[n - 1].attr
- assert isinstance(kwargs, tuple)
- i = n - (len(kwargs) + 1)
- j = 1 + n - node[n].attr
- else:
- i = start = n - 2
- for i in range(start, 0, -1):
- if not node[i].kind in ["expr", "call", "LOAD_CLASSNAME"]:
- break
- pass
- if i == start:
- return
- i += 2
- line_separator = ", "
- sep = ""
- self.write("(")
- if kwargs:
- # Last arg is tuple of keyword values: omit
- m = n - 1
- else:
- m = n
- if kwargs:
- # 3.6+ does this
- while j < i:
- self.write(sep)
- value = self.traverse(node[j])
- self.write("%s" % value)
- sep = line_separator
- j += 1
- j = 0
- while i < m:
- self.write(sep)
- value = self.traverse(node[i])
- self.write("%s=%s" % (kwargs[j], value))
- sep = line_separator
- j += 1
- i += 1
- else:
- while i < m:
- value = self.traverse(node[i])
- i += 1
- self.write(sep, value)
- sep = line_separator
- pass
- pass
- else:
- if node[0] == "LOAD_STR":
- return
- value = self.traverse(node[0])
- self.write("(")
- self.write(value)
- pass
- self.write(")")
- def kv_map(self, kv_node, sep, line_number, indent):
- first_time = True
- for kv in kv_node:
- assert kv in ("kv", "kv2", "kv3")
- # kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
- # kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR
- # kv3 ::= expr expr STORE_MAP
- # FIXME: DRY this and the above
- if kv == "kv":
- self.write(sep)
- name = self.traverse(kv[-2], indent="")
- if first_time:
- line_number = self.indent_if_source_nl(line_number, indent)
- first_time = False
- pass
- line_number = self.line_number
- self.write(name, ": ")
- value = self.traverse(kv[1], indent=self.indent + (len(name) + 2) * " ")
- elif kv == "kv2":
- self.write(sep)
- name = self.traverse(kv[1], indent="")
- if first_time:
- line_number = self.indent_if_source_nl(line_number, indent)
- first_time = False
- pass
- line_number = self.line_number
- self.write(name, ": ")
- value = self.traverse(
- kv[-3], indent=self.indent + (len(name) + 2) * " "
- )
- elif kv == "kv3":
- self.write(sep)
- name = self.traverse(kv[-2], indent="")
- if first_time:
- line_number = self.indent_if_source_nl(line_number, indent)
- first_time = False
- pass
- line_number = self.line_number
- self.write(name, ": ")
- line_number = self.line_number
- value = self.traverse(kv[0], indent=self.indent + (len(name) + 2) * " ")
- pass
- self.write(value)
- sep = ", "
- if line_number != self.line_number:
- sep += "\n" + self.indent + " "
- line_number = self.line_number
- pass
- pass
- def template_engine(self, entry, startnode):
- """The format template interpretation engine. See the comment at the
- beginning of this module for how we interpret format
- specifications such as %c, %C, and so on.
- """
- # print("-----")
- # print(startnode.kind)
- # print(entry[0])
- # print('======')
- fmt = entry[0]
- arg = 1
- i = 0
- m = escape.search(fmt)
- while m:
- i = m.end()
- self.write(m.group("prefix"))
- typ = m.group("type") or "{"
- node = startnode
- if m.group("child"):
- node = node[int(m.group("child"))]
- if typ == "%":
- self.write("%")
- elif typ == "+":
- self.line_number += 1
- self.indent_more()
- elif typ == "-":
- self.line_number += 1
- self.indent_less()
- elif typ == "|":
- self.line_number += 1
- self.write(self.indent)
- # Used mostly on the LHS of an assignment
- # BUILD_TUPLE_n is pretty printed and may take care of other uses.
- elif typ == ",":
- if node.kind in ("unpack", "unpack_w_parens") and node[0].attr == 1:
- self.write(",")
- elif typ == "c":
- index = entry[arg]
- if isinstance(index, tuple):
- if isinstance(index[1], str):
- # if node[index[0]] != index[1]:
- # from trepan.api import debug; debug()
- assert (
- node[index[0]] == index[1]
- ), "at %s[%d], expected '%s' node; got '%s'" % (
- node.kind,
- arg,
- index[1],
- node[index[0]].kind,
- )
- else:
- assert (
- node[index[0]] in index[1]
- ), "at %s[%d], expected to be in '%s' node; got '%s'" % (
- node.kind,
- arg,
- index[1],
- node[index[0]].kind,
- )
- index = index[0]
- assert isinstance(
- index, int
- ), "at %s[%d], %s should be int or tuple" % (
- node.kind,
- arg,
- type(index),
- )
- try:
- node[index]
- except IndexError:
- raise RuntimeError(
- f"""
- Expanding '{node.kind}' in template '{entry}[{arg}]':
- {index} is invalid; has only {len(node)} entries
- """
- )
- self.preorder(node[index])
- arg += 1
- elif typ == "p":
- p = self.prec
- # entry[arg]
- tup = entry[arg]
- assert isinstance(tup, tuple)
- if len(tup) == 3:
- (index, nonterm_name, self.prec) = tup
- if isinstance(tup[1], str):
- assert (
- node[index] == nonterm_name
- ), "at %s[%d], expected '%s' node; got '%s'" % (
- node.kind,
- arg,
- nonterm_name,
- node[index].kind,
- )
- else:
- assert node[tup[0]] in tup[1], (
- f"at {node.kind}[{tup[0]}], expected to be in '{tup[1]}' "
- f"node; got '{node[tup[0]].kind}'"
- )
- else:
- assert len(tup) == 2
- (index, self.prec) = entry[arg]
- self.preorder(node[index])
- self.prec = p
- arg += 1
- elif typ == "C":
- low, high, sep = entry[arg]
- remaining = len(node[low:high])
- for subnode in node[low:high]:
- self.preorder(subnode)
- remaining -= 1
- if remaining > 0:
- self.write(sep)
- pass
- pass
- arg += 1
- elif typ == "D":
- low, high, sep = entry[arg]
- remaining = len(node[low:high])
- for subnode in node[low:high]:
- remaining -= 1
- if len(subnode) > 0:
- self.preorder(subnode)
- if remaining > 0:
- self.write(sep)
- pass
- pass
- pass
- arg += 1
- elif typ == "x":
- # This code is only used in fragments
- assert isinstance(entry[arg], tuple)
- arg += 1
- elif typ == "P":
- p = self.prec
- low, high, sep, self.prec = entry[arg]
- remaining = len(node[low:high])
- # remaining = len(node[low:high])
- for subnode in node[low:high]:
- self.preorder(subnode)
- remaining -= 1
- if remaining > 0:
- self.write(sep)
- self.prec = p
- arg += 1
- elif typ == "{":
- expr = m.group("expr")
- # Line mapping stuff
- if (
- hasattr(node, "linestart")
- and node.linestart
- and hasattr(node, "current_line_number")
- ):
- self.source_linemap[self.current_line_number] = node.linestart
- if expr[0] == "%":
- index = entry[arg]
- self.template_engine((expr, index), node)
- arg += 1
- else:
- d = node.__dict__
- try:
- self.write(eval(expr, d, d))
- except Exception:
- raise
- m = escape.search(fmt, i)
- self.write(fmt[i:])
- def default(self, node):
- mapping = self._get_mapping(node)
- table = mapping[0]
- key = node
- for i in mapping[1:]:
- key = key[i]
- pass
- if key.kind in table:
- self.template_engine(table[key.kind], node)
- self.prune()
- def customize(self, customize):
- """
- Special handling for opcodes, such as those that take a variable number
- of arguments -- we add a new entry for each in TABLE_R.
- """
- for k, v in list(customize.items()):
- if k in self.TABLE_R:
- continue
- op = k[: k.rfind("_")]
- if k.startswith("CALL_METHOD"):
- # This happens in PyPy and Python 3.7+
- self.TABLE_R[k] = ("%c(%P)", (0, "expr"), (1, -1, ", ", 100))
- elif self.version >= (3, 6) and k.startswith("CALL_FUNCTION_KW"):
- self.TABLE_R[k] = ("%c(%P)", (0, "expr"), (1, -1, ", ", 100))
- elif op == "CALL_FUNCTION":
- self.TABLE_R[k] = (
- "%c(%P)",
- (0, "expr"),
- (1, -1, ", ", PRECEDENCE["yield"] - 1),
- )
- elif op in (
- "CALL_FUNCTION_VAR",
- "CALL_FUNCTION_VAR_KW",
- "CALL_FUNCTION_KW",
- ):
- # FIXME: handle everything in customize.
- # Right now, some of this is here, and some in that.
- if v == 0:
- template_str = "%c(%C" # '%C' is a dummy here ...
- p2 = (0, 0, None) # because of the None in this
- else:
- template_str = "%c(%C, "
- p2 = (1, -2, ", ")
- if op == "CALL_FUNCTION_VAR":
- # Python 3.5 only puts optional args (the VAR part)
- # the lowest down the stack
- if self.version == (3, 5):
- if template_str == "%c(%C, ":
- entry = ("%c(*%C, %c)", 0, p2, -2)
- elif template_str == "%c(%C":
- entry = ("%c(*%C)", 0, (1, 100, ""))
- elif self.version == (3, 4):
- # CALL_FUNCTION_VAR's top element of the stack contains
- # the variable argument list
- if v == 0:
- template_str = "%c(*%c)"
- entry = (template_str, 0, -2)
- else:
- template_str = "%c(%C, *%c)"
- entry = (template_str, 0, p2, -2)
- else:
- template_str += "*%c)"
- entry = (template_str, 0, p2, -2)
- elif op == "CALL_FUNCTION_KW":
- template_str += "**%c)"
- entry = (template_str, 0, p2, -2)
- elif op == "CALL_FUNCTION_VAR_KW":
- template_str += "*%c, **%c)"
- # Python 3.5 only puts optional args (the VAR part)
- # the lowest down the stack
- na = v & 0xFF # positional parameters
- if self.version == (3, 5) and na == 0:
- if p2[2]:
- p2 = (2, -2, ", ")
- entry = (template_str, 0, p2, 1, -2)
- else:
- if p2[2]:
- p2 = (1, -3, ", ")
- entry = (template_str, 0, p2, -3, -2)
- pass
- else:
- assert False, "Unhandled CALL_FUNCTION %s" % op
- self.TABLE_R[k] = entry
- pass
- # handled by n_dict:
- # if op == 'BUILD_SLICE': self.TABLE_R[k] = ('%C' , (0,-1,':'))
- # handled by n_list:
- # if op == 'BUILD_LIST': self.TABLE_R[k] = ('[%C]' , (0,-1,', '))
- # elif op == 'BUILD_TUPLE': self.TABLE_R[k] = ('(%C%,)', (0,-1,', '))
- pass
- return
- # This code is only for Python 1.x - 2.1 ish!
- def get_tuple_parameter(self, ast, name):
- """
- If the name of the formal parameter starts with dot,
- it's a tuple parameter, like this:
- # def MyFunc(xx, (a,b,c), yy):
- # print a, b*2, c*42
- In byte-code, the whole tuple is assigned to parameter '.1' and
- then the tuple gets unpacked to 'a', 'b' and 'c'.
- Since identifiers starting with a dot are illegal in Python,
- we can search for the byte-code equivalent to '(a,b,c) = .1'
- """
- assert ast == "stmts"
- for i in range(len(ast)):
- # search for an assign-statement
- if ast[i] == "sstmt":
- node = ast[i][0]
- else:
- node = ast[i]
- if node == "assign" and node[0] == ASSIGN_TUPLE_PARAM(name):
- # okay, this assigns '.n' to something
- del ast[i]
- # walk lhs; this
- # returns a tuple of identifiers as used
- # within the function definition
- assert node[1] == "store"
- # if lhs is not a UNPACK_TUPLE (or equiv.),
- # add parentheses to make this a tuple
- # if node[1][0] not in ('unpack', 'unpack_list'):
- result = self.traverse(node[1])
- if not (result.startswith("(") and result.endswith(")")):
- result = "(%s)" % result
- return result
- # return self.traverse(node[1])
- return f"({name}"
- def build_class(self, code):
- """Dump class definition, doc string and class body."""
- assert iscode(code)
- self.classes.append(self.currentclass)
- code = Code(code, self.scanner, self.currentclass)
- indent = self.indent
- # self.println(indent, '#flags:\t', int(code.co_flags))
- ast = self.build_ast(code._tokens, code._customize, code)
- # save memory by deleting no-longer-used structures
- code._tokens = None
- if ast[0] == "sstmt":
- ast[0] = ast[0][0]
- first_stmt = ast[0]
- if ast[0] == "docstring":
- self.println(self.traverse(ast[0]))
- del ast[0]
- first_stmt = ast[0]
- if (3, 0) <= self.version <= (3, 3):
- try:
- if first_stmt == "store_locals":
- if self.hide_internal:
- del ast[0]
- if ast[0] == "sstmt":
- ast[0] = ast[0][0]
- first_stmt = ast[0]
- except Exception:
- pass
- try:
- if first_stmt == NAME_MODULE:
- if self.hide_internal:
- del ast[0]
- first_stmt = ast[0]
- pass
- except Exception:
- pass
- have_qualname = False
- if len(ast):
- if ast[0] == "sstmt":
- ast[0] = ast[0][0]
- first_stmt = ast[0]
- if self.version < (3, 0):
- # Should we ditch this in favor of the "else" case?
- qualname = ".".join(self.classes)
- qual_name_tree = SyntaxTree(
- "assign",
- [
- SyntaxTree("expr", [Token("LOAD_CONST", pattr=qualname)]),
- SyntaxTree("store", [Token("STORE_NAME", pattr="__qualname__")]),
- ],
- )
- # FIXME: is this right now that we've redone the grammar?
- have_qualname = ast[0] == qual_name_tree
- else:
- # Python 3.4+ has constants like 'cmp_to_key.<locals>.K'
- # which are not simple classes like the < 3 case.
- try:
- if (
- first_stmt == "assign"
- and first_stmt[0][0] == "LOAD_STR"
- and first_stmt[1] == "store"
- and first_stmt[1][0] == Token("STORE_NAME", pattr="__qualname__")
- ):
- have_qualname = True
- except Exception:
- pass
- if have_qualname:
- if self.hide_internal:
- del ast[0]
- pass
- # if docstring exists, dump it
- if code.co_consts and code.co_consts[0] is not None and len(ast) > 0:
- do_doc = False
- i = 0
- if is_docstring(ast[0], self.version, code.co_consts):
- do_doc = True
- elif len(ast) > 1 and is_docstring(ast[1], self.version, code.co_consts):
- i = 1
- do_doc = True
- if do_doc and self.hide_internal:
- try:
- # FIXME: Is there an extra [0]?
- docstring = ast[i][0][0][0][0].pattr
- except Exception:
- docstring = code.co_consts[0]
- if print_docstring(self, indent, docstring):
- self.println()
- del ast[i]
- # The function defining a class returns locals() in Python somewhere less than
- # 3.7.
- #
- # We don't want this to show up in the source, so remove the node.
- if len(ast):
- if ast == "stmts" and ast[-1] == "sstmt":
- return_locals_parent = ast[-1]
- parent_index = 0
- else:
- return_locals_parent = ast
- parent_index = -1
- return_locals = return_locals_parent[parent_index]
- if return_locals == RETURN_LOCALS:
- if self.hide_internal:
- del return_locals_parent[parent_index]
- pass
- pass
- # else:
- # print stmt[-1]
- globals, nonlocals = find_globals_and_nonlocals(
- ast, set(), set(), code, self.version
- )
- # Add "global" declaration statements at the top
- # of the function
- for g in sorted(globals):
- self.println(indent, "global ", g)
- for nl in sorted(nonlocals):
- self.println(indent, "nonlocal ", nl)
- old_name = self.name
- self.gen_source(ast, code.co_name, code._customize)
- self.name = old_name
- # save memory by deleting no-longer-used structures
- code._tokens = None
- code._customize = None
- self.classes.pop(-1)
- def gen_source(
- self,
- ast,
- name,
- customize,
- is_lambda=False,
- returnNone=False,
- debug_opts=DEFAULT_DEBUG_OPTS,
- ):
- """convert parse tree to Python source code"""
- rn = self.return_none
- self.return_none = returnNone
- old_name = self.name
- self.name = name
- self.debug_opts = debug_opts
- # if code would be empty, append 'pass'
- if len(ast) == 0:
- self.println(self.indent, "pass")
- else:
- self.customize(customize)
- self.text = self.traverse(ast, is_lambda=is_lambda)
- # In a formatted string using "lambda", we should not add "\n".
- # For example in:
- # f'{(lambda x:x)("8")!r}'
- # Adding a "\n" after "lambda x: x" will give an error message:
- # SyntaxError: f-string expression part cannot include a backslash
- # So avoid \n after writing text
- self.write(self.text)
- self.name = old_name
- self.return_none = rn
- def build_ast(
- self,
- tokens,
- customize,
- code,
- is_lambda=False,
- noneInNames=False,
- is_top_level_module=False,
- compile_mode="exec",
- ) -> GenericASTTraversal:
- # FIXME: DRY with fragments.py
- # assert isinstance(tokens[0], Token)
- if is_lambda:
- for t in tokens:
- if t.kind == "RETURN_END_IF":
- t.kind = "RETURN_END_IF_LAMBDA"
- elif t.kind == "RETURN_VALUE":
- t.kind = "RETURN_VALUE_LAMBDA"
- tokens.append(Token("LAMBDA_MARKER"))
- try:
- # FIXME: have p.insts update in a better way
- # modularity is broken here
- p_insts = self.p.insts
- self.p.insts = self.scanner.insts
- self.p.offset2inst_index = self.scanner.offset2inst_index
- ast = parse(self.p, tokens, customize, code)
- self.customize(customize)
- self.p.insts = p_insts
- except (ParserError, AssertionError) as e:
- raise ParserError(e, tokens, self.p.debug["reduce"])
- transform_tree = self.treeTransform.transform(ast, code)
- self.maybe_show_tree(ast, phase="after")
- del ast # Save memory
- return transform_tree
- # The bytecode for the end of the main routine has a "return
- # None". However, you can't issue a "return" statement in
- # main. So as the old cigarette slogan goes: I'd rather switch
- # (the token stream) than fight (with the grammar to not emit
- # "return None").
- if self.hide_internal:
- if len(tokens) >= 2 and not noneInNames:
- if tokens[-1].kind in ("RETURN_VALUE", "RETURN_VALUE_LAMBDA"):
- # Python 3.4's classes can add a "return None" which is
- # invalid syntax.
- load_const = tokens[-2]
- # We should have:
- # LOAD_CONST None
- # with *no* line number associated the token.
- # A line number on the token or a non-None
- # token value a token based on user source
- # text.
- if (
- load_const.kind == "LOAD_CONST"
- and load_const.linestart is None
- and load_const.attr is None
- ):
- # Delete LOAD_CONST (None) RETURN_VALUE
- del tokens[-2:]
- else:
- tokens.append(Token("RETURN_LAST"))
- if len(tokens) == 0:
- return PASS
- # Build a parse tree from a tokenized and massaged disassembly.
- try:
- # FIXME: have p.insts update in a better way
- # Modularity is broken here.
- p_insts = self.p.insts
- self.p.insts = self.scanner.insts
- self.p.offset2inst_index = self.scanner.offset2inst_index
- self.p.opc = self.scanner.opc
- ast = parse(self.p, tokens, customize, code)
- self.p.insts = p_insts
- except (ParserError, AssertionError) as e:
- raise ParserError(e, tokens, self.p.debug["reduce"])
- checker(ast, False, self.ast_errors)
- self.customize(customize)
- transform_tree = self.treeTransform.transform(ast, code)
- self.maybe_show_tree(transform_tree, phase="after")
- del ast # Save memory
- return transform_tree
- def _get_mapping(self, node):
- return self.MAP.get(node, self.MAP_DIRECT)
- def code_deparse(
- co,
- out=sys.stdout,
- version: Optional[tuple] = None,
- debug_opts=DEFAULT_DEBUG_OPTS,
- code_objects={},
- compile_mode="exec",
- is_pypy=IS_PYPY,
- walker=SourceWalker,
- start_offset: int = 0,
- stop_offset: int = -1,
- ) -> Optional[SourceWalker]:
- """
- ingests and deparses a given code block 'co'. If version is None,
- we will use the current Python interpreter version.
- """
- assert iscode(co)
- if out is None:
- out = sys.stdout
- if version is None:
- version = PYTHON_VERSION_TRIPLE
- # store final output stream for case of error
- scanner = get_scanner(version, is_pypy=is_pypy, show_asm=debug_opts["asm"])
- tokens, customize = scanner.ingest(
- co, code_objects=code_objects, show_asm=debug_opts["asm"]
- )
- if start_offset > 0:
- for i, t in enumerate(tokens):
- # If t.offset is a string, we want to skip this.
- if isinstance(t.offset, int) and t.offset >= start_offset:
- tokens = tokens[i:]
- break
- if stop_offset > -1:
- for i, t in enumerate(tokens):
- # In contrast to the test for start_offset If t.offset is
- # a string, we want to extract the integer offset value.
- if t.off2int() >= stop_offset:
- tokens = tokens[:i]
- break
- debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG))
- # Build Syntax Tree from disassembly.
- linestarts = dict(scanner.opc.findlinestarts(co))
- deparsed = walker(
- version,
- out,
- scanner,
- showast=debug_opts.get("tree", TREE_DEFAULT_DEBUG),
- debug_parser=debug_parser,
- compile_mode=compile_mode,
- is_pypy=is_pypy,
- linestarts=linestarts,
- )
- is_top_level_module = co.co_name == "<module>"
- if compile_mode == "eval":
- deparsed.hide_internal = False
- deparsed.compile_mode = compile_mode
- deparsed.ast = deparsed.build_ast(
- tokens,
- customize,
- co,
- is_lambda=is_lambda_mode(compile_mode),
- is_top_level_module=is_top_level_module,
- compile_mode=compile_mode,
- )
- # XXX workaround for profiling
- if deparsed.ast is None:
- return None
- # FIXME use a lookup table here.
- if is_lambda_mode(compile_mode):
- expected_start = "lambda_start"
- elif compile_mode == "eval":
- expected_start = "expr_start"
- elif compile_mode == "expr":
- expected_start = "expr_start"
- elif compile_mode == "exec":
- expected_start = "stmts"
- elif compile_mode == "single":
- # expected_start = "single_start"
- expected_start = None
- else:
- expected_start = None
- if expected_start:
- assert deparsed.ast == expected_start, (
- f"Should have parsed grammar start to '{expected_start}'; "
- f"got: {deparsed.ast.kind}"
- )
- # save memory
- del tokens
- deparsed.mod_globs, nonlocals = find_globals_and_nonlocals(
- deparsed.ast, set(), set(), co, version
- )
- assert not nonlocals
- # convert leading '__doc__ = "..." into doc string
- try:
- stmts = deparsed.ast
- first_stmt = stmts[0]
- if version >= (3, 6):
- if first_stmt[0] == "SETUP_ANNOTATIONS":
- del stmts[0]
- assert stmts[0] == "sstmt"
- # Nuke sstmt
- first_stmt = stmts[0][0]
- pass
- pass
- if first_stmt == "docstring":
- print_docstring(deparsed, "", co.co_consts[0])
- del stmts[0]
- if stmts[-1] == RETURN_NONE:
- stmts.pop() # remove last node
- # todo: if empty, add 'pass'
- except Exception:
- pass
- deparsed.FUTURE_UNICODE_LITERALS = (
- COMPILER_FLAG_BIT["FUTURE_UNICODE_LITERALS"] & co.co_flags != 0
- )
- # What we've been waiting for: Generate source from Syntax Tree!
- deparsed.gen_source(
- deparsed.ast,
- name=co.co_name,
- customize=customize,
- is_lambda=is_lambda_mode(compile_mode),
- debug_opts=debug_opts,
- )
- for g in sorted(deparsed.mod_globs):
- deparsed.write("# global %s ## Warning: Unused global\n" % g)
- if deparsed.ast_errors:
- deparsed.write("# NOTE: have internal decompilation grammar errors.\n")
- deparsed.write("# Use -T option to show full context.")
- for err in deparsed.ast_errors:
- deparsed.write(err)
- raise SourceWalkerError("Deparsing hit an internal grammar-rule bug")
- if deparsed.ERROR:
- raise SourceWalkerError("Deparsing stopped due to parse error")
- return deparsed
- def deparse_code2str(
- code,
- out=sys.stdout,
- version=None,
- debug_opts=DEFAULT_DEBUG_OPTS,
- code_objects={},
- compile_mode="exec",
- is_pypy=IS_PYPY,
- walker=SourceWalker,
- start_offset: int = 0,
- stop_offset: int = -1,
- ) -> str:
- """
- Return the deparsed text for a Python code object. `out` is where
- any intermediate output for assembly or tree output will be sent.
- """
- return code_deparse(
- code,
- out,
- version,
- debug_opts,
- code_objects=code_objects,
- compile_mode=compile_mode,
- is_pypy=is_pypy,
- walker=walker,
- ).text
- if __name__ == "__main__":
- def deparse_test(co):
- """This is a docstring"""
- s = deparse_code2str(co)
- # s = deparse_code2str(co, debug_opts={"asm": "after", "tree": {'before': False, 'after': False}})
- print(s)
- return
- deparse_test(deparse_test.__code__)
|