| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733 |
- # Copyright (c) 2022-2024 by Rocky Bernstein
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- """
- Generators and comprehension functions
- """
- from typing import Optional
- from spark_parser.ast import GenericASTTraversalPruningException
- from xdis import iscode
- from decompyle3.parsers.main import get_python_parser
- from decompyle3.scanner import Code
- from decompyle3.scanners.tok import Token
- from decompyle3.semantics.consts import PRECEDENCE
- from decompyle3.semantics.helper import is_lambda_mode
- class ComprehensionMixin:
- """
- These functions hand nonterminal common actions that occur
- when encountering a generator or some sort of comprehension.
- What is common about these is that often the nonterminal has
- a code object whose decompilation needs to be melded into the resulting
- Python source code. In source code, the implicit function calls
- are not seen.
- """
- def closure_walk(self, node, collection_index: int):
- """Dictionary and Set comprehensions using closures."""
- p: int = self.prec
- code_index = 0 if node[0] == "load_genexpr" else 1
- tree = self.get_comprehension_function(node, code_index=code_index)
- if tree.kind in ("stmts", "lambda_start"):
- tree = tree[0]
- # Remove single reductions as in ("stmts", "sstmt"):
- # FIXME: when parse_tree -> ast_transform is done we should
- # not need this.
- while len(tree) == 1 or tree.kind in ("return_expr_lambda",):
- tree = tree[0]
- if tree == "genexpr_func_async":
- store = tree[2]
- iter_index = 3
- collection_index = 3
- elif tree in ("genexpr_func", "dict_comp_func", "set_comp_func"):
- store = tree[3]
- iter_index = 4
- elif tree == "set_comp":
- tree = tree[1][0]
- assert tree == "set_for", tree.kind
- store = tree[2]
- iter_index = 3
- else:
- store = tree[4]
- iter_index = 5
- if node[collection_index] == "get_iter":
- expr = node[collection_index][0]
- assert expr == "expr", expr.kind
- collection = expr
- else:
- collection = node[collection_index]
- n = tree[iter_index]
- if_condition = None
- write_if = False
- assert n in ("comp_iter", "set_iter")
- # Find inner-most body node.
- while n == "comp_iter":
- n = n[0] # recurse one step
- if n in ("list_for", "comp_for"):
- store = n[2]
- n = n[3]
- elif n[0].kind == "c_compare":
- if_condition = n
- n = n[-1]
- elif n in (
- "comp_if",
- "comp_if_not",
- "list_if",
- "list_if_and_or",
- "list_if_not",
- "list_if_or_not",
- ):
- # FIXME: most of the grammar start with expr_...
- # Some of the older ones can be: expr <jump> <iter>
- # This may disappear though.
- if n[0].kind == "expr":
- if_condition = n
- n = n[-1]
- elif n[0].kind in ("expr_pjif", "expr_pjiff"):
- if_condition = n
- n = n[-1]
- assert n == "comp_iter"
- elif n[0].kind in ("or_jump_if_false_cf", "or_jump_if_false_loop_cf"):
- if_condition = n[1]
- n = n[1]
- else:
- if len(n) == 2:
- if_condition = n[0]
- n = n[1]
- else:
- if_condition = n[1]
- n = n[2]
- pass
- pass
- assert n in ("comp_body", "set_iter"), n.kind
- self.preorder(n[0])
- if node == "generator_exp_async":
- self.write(" async")
- self.write(" for ")
- self.preorder(store)
- self.write(" in ")
- self.preorder(collection)
- if if_condition:
- if write_if:
- self.write(" if ")
- self.preorder(if_condition)
- self.prec = p
- def comprehension_walk(
- self,
- node,
- iter_index: Optional[int],
- ):
- p: int = self.prec
- self.prec = PRECEDENCE["lambda_body"] - 1
- # FIXME: clean this up
- if node == "dict_comp":
- cn = node[1]
- elif node in ("generator_exp", "generator_exp_async"):
- if node[0] == "load_genexpr":
- load_genexpr = node[0]
- elif node[1] == "load_genexpr":
- load_genexpr = node[1]
- cn = load_genexpr[0]
- else:
- if len(node[1]) > 1 and hasattr(node[1][1], "attr"):
- # Python 3.3+ does this
- cn = node[1][1]
- else:
- assert False, "Can't find code for comprehension"
- assert iscode(cn.attr)
- code = Code(cn.attr, self.scanner, self.currentclass, self.debug_opts["asm"])
- # FIXME: is there a way we can avoid this?
- # The problem is that in filter in top-level list comprehensions we can
- # encounter comprehensions of other kinds, and lambdas
- if is_lambda_mode(self.compile_mode):
- p_save = self.p
- self.p = get_python_parser(
- self.version,
- compile_mode="exec",
- is_pypy=self.is_pypy,
- )
- tree = self.build_ast(code._tokens, code._customize, code)
- self.p = p_save
- else:
- tree = self.build_ast(code._tokens, code._customize, code)
- self.customize(code._customize)
- # Remove single reductions as in ("stmts", "sstmt"):
- while len(tree) == 1:
- tree = tree[0]
- if tree == "stmts":
- # FIXME: rest is a return None?
- # Verify this
- # rest = tree[1:]
- tree = tree[0]
- elif tree == "lambda_start":
- assert len(tree) <= 3
- tree = tree[-2]
- if tree == "return_expr_lambda":
- tree = tree[1]
- pass
- if tree in ("genexpr_func_async", "genexpr_func"):
- for n in tree:
- if n.kind == "comp_iter":
- break
- pass
- else:
- n = tree[iter_index]
- assert n == "comp_iter", n
- if_condition = None
- write_if = False
- # Find the comprehension body. It is the inner-most
- # node that is not list_.. .
- while n == "comp_iter": # list_iter
- n = n[0] # recurse one step
- if n == "comp_for":
- if n[0] == "SETUP_LOOP":
- n = n[4]
- else:
- n = n[3]
- elif n == "comp_if":
- n = n[1]
- elif n in (
- "comp_if_not",
- "comp_if_not_and",
- "comp_if_not_or",
- "comp_if_or",
- ):
- if_condition = n
- write_if = True
- n = n[-1]
- assert n == "comp_iter"
- assert n == "comp_body", n.kind
- self.preorder(n[0])
- if node == "generator_exp_async":
- self.write(" async")
- iter_var_index = 2
- else:
- iter_var_index = iter_index
- if tree[iter_var_index] != "store":
- iter_var_index = iter_index - 1
- self.write(" for ")
- self.preorder(tree[iter_var_index])
- self.write(" in ")
- if node[2] == "expr":
- iter_expr = node[2]
- elif node[3] == "get_aiter":
- iter_expr = node[3]
- else:
- iter_expr = node[-3]
- assert iter_expr in ("expr", "get_aiter"), iter_expr
- self.preorder(iter_expr)
- if if_condition and not tree[iter_index][0].kind.startswith("comp_if"):
- if write_if:
- self.write(" if ")
- self.preorder(if_condition)
- self.prec = p
- def comprehension_walk_newer(
- self,
- node,
- iter_index: Optional[int],
- code_index: int = -5,
- collection_node=None,
- ):
- """Non-closure-based comprehensions.
- Note: there are also other comprehensions.
- """
- # FIXME: DRY with listcomp_closure3
- p = self.prec
- self.prec = PRECEDENCE["lambda_body"] - 1
- # FIXME? Nonterminals in grammar maybe should be split out better?
- # Maybe test on self.compile_mode?
- if (
- isinstance(node[0], Token)
- and node[0].kind.startswith("LOAD")
- and iscode(node[0].attr)
- ):
- if node[3] == "get_aiter":
- compile_mode = self.compile_mode
- self.compile_mode = "genexpr"
- is_lambda = self.is_lambda
- self.is_lambda = True
- try:
- tree = self.get_comprehension_function(node, code_index)
- except GenericASTTraversalPruningException:
- pass
- self.compile_mode = compile_mode
- self.is_lambda = is_lambda
- else:
- tree = self.get_comprehension_function(node, code_index)
- elif (
- len(node) > 2
- and isinstance(node[2], Token)
- and node[2].kind.startswith("LOAD")
- and iscode(node[2].attr)
- ):
- tree = self.get_comprehension_function(node, 2)
- else:
- tree = node
- # Pick out important parts of the comprehension:
- # * the variable we iterate over: "store"
- # * the results we accumulate: "n"
- store = None
- if tree.kind == "genexpr_func_async":
- genexpr_func_async = tree
- elif tree.kind == "set_iter":
- # Not sure if this is correct
- node = tree = tree[0]
- elif tree.kind != "genexpr_func":
- # Not sure if this is still correct
- genexpr_func_async = tree[1]
- collection_node_index = None
- if node == "list_comp_async":
- # We have two different kinds of grammar rules:
- # list_comp_async ::= LOAD_LISTCOMP LOAD_STR MAKE_FUNCTION_0 expr ...
- # and:
- # list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2
- if tree[0] == "expr" and tree[0][0] == "list_comp_async":
- tree = tree[0][0]
- # PyPy 3.8 has LOAD_ARG
- if tree[0] in ("BUILD_LIST_0", "LOAD_ARG"):
- list_afor2 = tree[2]
- assert list_afor2 == "list_afor2"
- store = list_afor2[1]
- assert store == "store"
- n = list_afor2[3] if list_afor2[3] == "list_iter" else list_afor2[2]
- collection_node_index = 1
- else:
- # ???
- pass
- elif node.kind in ("dict_comp_async", "set_comp_async"):
- # We have two different kinds of grammar rules:
- # dict_comp_async ::= LOAD_DICTCOMP LOAD_STR MAKE_FUNCTION_0 expr ...
- # set_comp_async ::= LOAD_SETCOMP LOAD_STR MAKE_FUNCTION_0 expr ...
- # and:
- # dict_comp_async ::= BUILD_MAP_0 LOAD_ARG genexpr_func_async
- # set_comp_async ::= BUILD_SET_0 LOAG_ARG genexpr_func_async
- if tree[0] == "expr":
- tree = tree[0]
- if tree[0].kind in ("BUILD_MAP_0", "BUILD_SET_0"):
- genexpr_func_async = tree[1]
- if genexpr_func_async == "genexpr_func_async":
- store = genexpr_func_async[2]
- assert store.kind.startswith("store")
- n = genexpr_func_async[3]
- else:
- set_afor2 = genexpr_func_async
- assert set_afor2 == "set_afor2"
- n = set_afor2[1]
- store = n[1]
- collection_node = node[3]
- else:
- # ???
- pass
- elif node == "list_afor":
- collection_node = node[0]
- list_afor2 = node[1]
- assert list_afor2 == "list_afor2"
- store = list_afor2[1]
- assert store == "store"
- n = list_afor2[2]
- elif node == "set_afor2":
- collection_node = node[0]
- set_iter_async = node[1]
- assert set_iter_async == "set_iter_async"
- store = set_iter_async[1]
- assert store == "store"
- n = set_iter_async[2]
- elif node == "list_comp" and tree[0] == "expr":
- tree = tree[0][0]
- n = tree[iter_index]
- elif node == "set_comp" and tree[1] == "set_iter":
- n = tree[1]
- else:
- for k in tree:
- if k.kind in ("comp_iter", "list_iter", "set_iter", "lc_body"):
- n = k
- break
- pass
- else:
- n = tree[iter_index]
- if tree in (
- "dict_comp_func",
- "genexpr_func",
- "genexpr_func_async",
- "generator_exp",
- "list_comp",
- "set_comp",
- "set_comp_func",
- "set_comp_func_header",
- ):
- # Find location of store
- for k in tree:
- if k.kind in ("comp_iter", "list_iter", "set_iter", "await_expr"):
- n = k
- elif k == "store":
- store = k
- break
- pass
- pass
- elif tree.kind in ("list_comp_async", "dict_comp_async", "set_afor2"):
- # Handled this condition above.
- pass
- else:
- # FIXME: we get this when we parse lambda's explicitly.
- # And here we've already printed/handled the list comprehension
- # this iteration is duplicate in seeing the list-comprehension code
- # item again. Is this a larger duplicate parsing problem?
- # Not sure what the best this thing to do is.
- # try:
- # n
- # except:
- # from trepan.api import debug; debug()
- if n.kind in ("RETURN_VALUE_LAMBDA", "return_expr_lambda"):
- self.prune()
- assert n.kind in ("list_iter", "comp_iter", "set_iter", "set_iter_async"), n
- # FIXME: I'm not totally sure this is right.
- # Find the list comprehension body. It is the inner-most
- # node that is not list_.. .
- if_nodes = []
- if_node_parent = None
- comp_for = None
- comp_store = None
- if n == "comp_iter":
- comp_for = n
- if not store:
- comp_store = tree[3]
- # Iterate to find the inner-most comprehension body.
- # We'll come back to the list iteration below.
- while n in (
- "comp_iter",
- "list_afor",
- "list_afor2",
- "list_iter",
- "set_afor",
- "set_afor2",
- "set_iter",
- "set_iter_async",
- ):
- # iterate one nesting deeper
- if n in ("list_afor", "set_afor"):
- n = n[1]
- elif n in ("list_afor2", "set_afor2", "set_iter_async"):
- if n[1] == "store":
- store = n[1]
- n = n[3] if n[3] in ("list_iter", "set_iter") else n[2]
- else:
- n = n[0]
- if n in ("comp_for", "list_for", "set_for"):
- collection_node = n
- if n[2] == "store" and not store:
- store = n[2]
- if not comp_store:
- comp_store = store
- n = n[3]
- assert n.kind in ("comp_iter", "list_iter", "set_iter")
- elif n in ("list_if_chained",):
- # list_if_chained ::= list_if_compare ... list_iter
- if_nodes.append(n[0])
- assert n[0] == "list_if_compare"
- n = n[-1]
- assert n == "list_iter"
- elif n in (
- "comp_if_not_and",
- "comp_if_or",
- "comp_if_or2",
- "comp_if_or_not",
- "comp_if_not_or",
- ):
- if_nodes.append(n[0])
- n = n[-1]
- assert n == "comp_iter"
- elif n in (
- "list_if",
- "list_if_not",
- "list_if37",
- "list_if37_not",
- "comp_if",
- "comp_if_not",
- ):
- if n in ("list_if37", "list_if37_not", "comp_if"):
- if n in ("comp_if", "list_if37", "list_if"):
- if_nodes.append(n[0])
- n = n[1]
- else:
- if n in ("comp_if_not",):
- if_nodes.append(n)
- else:
- if_node_parent = n
- if_nodes.append(n[0])
- if n[1] == "store":
- store = n[1]
- n = n[-2] if n[-1] == "come_from_opt" else n[-1]
- pass
- elif n.kind == "list_if_and_or":
- if_nodes.append(n[-1][0])
- n = n[-1]
- assert n == "list_iter"
- pass
- # Python 2.7+ starts including set_comp_body
- # Python 3.5+ starts including set_comp_func
- assert store, "Couldn't find store in list/set comprehension"
- # A problem created with later Python code generation is that there
- # is a lambda set up with a dummy argument name that is then called
- # So we can't just translate that as is but need to replace the
- # dummy name. Below we are picking out the variable name as seen
- # in the code. And trying to generate code for the other parts
- # that don't have the dummy argument name in it.
- # Another approach might be to be able to pass in the source name
- # for the dummy argument.
- if node not in ("list_afor", "set_afor"):
- # FIXME decompile_cfg doesn't have to do this. Find out why.
- self.preorder(n if n.kind in ("await_expr", "LOAD_ARG") else n[0])
- if node.kind in (
- "dict_comp_async",
- "genexpr_func_async",
- "list_afor",
- "list_comp_async",
- "set_afor2",
- "set_comp_async",
- ):
- self.write(" async")
- # For listcomp, setcomp, etc., the collection is .0 and that's the best we
- # can do. So don't try to find the collection node.
- if not self.compile_mode.endswith("comp"):
- collection_node_index = None
- if collection_node_index is None:
- for i, child in enumerate(node):
- if child.kind in (
- "expr",
- "expr_get_aiter",
- "get_aiter",
- "get_iter",
- ):
- collection_node_index = i
- break
- assert collection_node_index is not None
- elif len(node) >= 3 and node[3] == "expr":
- collection_node_index = 3
- collection_node = node[3]
- assert collection_node == "expr"
- elif node == "comp_body":
- collection_node = node
- elif node.kind == "genexpr_func":
- collection_node_index = 0
- elif collection_node is None:
- collection_node_index = -3
- self.write(" for ")
- if comp_store:
- self.preorder(comp_store)
- # We have already added the comp_store contribution,
- # don't attempt to decompile it again
- comp_store = None
- else:
- self.preorder(store)
- self.write(" in ")
- if node == "list_afor":
- list_afor2 = node[1]
- assert list_afor2 == "list_afor2"
- list_iter = list_afor2[2]
- assert list_iter == "list_iter"
- self.preorder(collection_node)
- elif node == "set_comp_async":
- self.preorder(collection_node)
- elif node == "list_comp_async":
- self.preorder(node[collection_node_index])
- elif is_lambda_mode(self.compile_mode) and collection_node_index is None:
- if node in ("list_comp_async",):
- self.preorder(node[1])
- elif collection_node is None:
- assert node[3] in ("get_aiter", "get_iter"), node[3].kind
- self.preorder(node[3])
- else:
- self.preorder(collection_node)
- else:
- if not collection_node:
- if node[3] == "set_iter":
- self.preorder(tree[0])
- collection_node = node[collection_node_index]
- self.preorder(collection_node)
- # Here is where we handle nested list iterations which
- # includes their corresponding "if" conditions.
- if tree in ("list_comp", "set_comp") and not self.is_pypy:
- list_iter = tree[1]
- assert list_iter in ("list_iter", "set_iter")
- list_for = list_iter[0]
- if list_for in ("list_for", "set_for"):
- # In the grammar we have:
- # list_for ::= _ for_iter store list_iter ...
- # or
- # set_for ::= _ set_iter store set_iter ...
- list_iter_inner = list_for[3]
- assert list_iter_inner in ("list_iter", "set_iter")
- # If we have set_comp_body, we've done this above.
- if not (
- list_iter_inner == "set_iter"
- and list_iter_inner[0] == "set_comp_body"
- ):
- self.preorder(list_iter_inner)
- if if_node_parent == list_iter_inner[0]:
- self.prec = p
- return
- comp_store = None
- pass
- elif tree == "set_comp_func":
- # Handle nested comp_for iterations.
- comp_iter = tree[4]
- assert comp_iter in ("comp_iter", "await_expr")
- while comp_iter == "comp_iter":
- comp_for = comp_iter[0]
- if comp_for != "comp_for":
- break
- self.preorder(comp_iter)
- if len(comp_for) < 4:
- break
- comp_iter = comp_for[3]
- assert comp_store is None
- if comp_store:
- self.preorder(comp_store)
- for if_node in if_nodes:
- if if_node != "comp_if_or":
- self.write(" if ")
- if if_node in (
- "c_compare_chained37_false",
- "comp_if_not_and",
- "comp_if_not_or",
- "comp_if_or",
- "comp_if_or2",
- "comp_if_or_not",
- ):
- self.preorder(if_node)
- else:
- # FIXME: go over these to add more of this in the template,
- # not here.
- if if_node in (
- "comp_if_not",
- "list_if37_not",
- "list_if_not",
- "list_if_or_not",
- ):
- self.write("not ")
- pass
- self.prec = PRECEDENCE["lambda_body"] - 1
- self.preorder(if_node[0])
- pass
- self.prec = p
- def get_comprehension_function(self, node, code_index: int):
- """
- Build the body of a comprehension function and then
- find the comprehension node buried in the tree which may
- be surrounded with start-like symbols or dominiators,.
- """
- self.prec = PRECEDENCE["lambda_body"] - 1
- code_node = node[code_index]
- if code_node == "load_genexpr":
- code_node = code_node[0]
- code_obj = code_node.attr
- assert iscode(code_obj), code_node
- code = Code(code_obj, self.scanner, self.currentclass, self.debug_opts["asm"])
- # FIXME: is there a way we can avoid this?
- # The problem is that in filter in top-level list comprehensions we can
- # encounter comprehensions of other kinds, and lambdas
- if is_lambda_mode(self.compile_mode):
- p_save = self.p
- self.p = get_python_parser(
- self.version,
- compile_mode="exec",
- is_pypy=self.is_pypy,
- )
- tree = self.build_ast(
- code._tokens, code._customize, code, is_lambda=self.is_lambda
- )
- self.p = p_save
- else:
- tree = self.build_ast(
- code._tokens, code._customize, code, is_lambda=self.is_lambda
- )
- self.customize(code._customize)
- # skip over: sstmt, stmt, return, return_expr
- # and other singleton derivations
- if tree == "lambda_start":
- tree = tree[0]
- while len(tree) == 1 or (
- tree in ("stmt", "sstmt", "return", "return_expr", "return_expr_lambda")
- ):
- self.prec = 100
- tree = tree[0]
- return tree
|