# Copyright (c) 2022-2024 by Rocky Bernstein # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """ Generators and comprehension functions """ from typing import Optional from spark_parser.ast import GenericASTTraversalPruningException from xdis import iscode from decompyle3.parsers.main import get_python_parser from decompyle3.scanner import Code from decompyle3.scanners.tok import Token from decompyle3.semantics.consts import PRECEDENCE from decompyle3.semantics.helper import is_lambda_mode class ComprehensionMixin: """ These functions hand nonterminal common actions that occur when encountering a generator or some sort of comprehension. What is common about these is that often the nonterminal has a code object whose decompilation needs to be melded into the resulting Python source code. In source code, the implicit function calls are not seen. """ def closure_walk(self, node, collection_index: int): """Dictionary and Set comprehensions using closures.""" p: int = self.prec code_index = 0 if node[0] == "load_genexpr" else 1 tree = self.get_comprehension_function(node, code_index=code_index) if tree.kind in ("stmts", "lambda_start"): tree = tree[0] # Remove single reductions as in ("stmts", "sstmt"): # FIXME: when parse_tree -> ast_transform is done we should # not need this. while len(tree) == 1 or tree.kind in ("return_expr_lambda",): tree = tree[0] if tree == "genexpr_func_async": store = tree[2] iter_index = 3 collection_index = 3 elif tree in ("genexpr_func", "dict_comp_func", "set_comp_func"): store = tree[3] iter_index = 4 elif tree == "set_comp": tree = tree[1][0] assert tree == "set_for", tree.kind store = tree[2] iter_index = 3 else: store = tree[4] iter_index = 5 if node[collection_index] == "get_iter": expr = node[collection_index][0] assert expr == "expr", expr.kind collection = expr else: collection = node[collection_index] n = tree[iter_index] if_condition = None write_if = False assert n in ("comp_iter", "set_iter") # Find inner-most body node. while n == "comp_iter": n = n[0] # recurse one step if n in ("list_for", "comp_for"): store = n[2] n = n[3] elif n[0].kind == "c_compare": if_condition = n n = n[-1] elif n in ( "comp_if", "comp_if_not", "list_if", "list_if_and_or", "list_if_not", "list_if_or_not", ): # FIXME: most of the grammar start with expr_... # Some of the older ones can be: expr # This may disappear though. if n[0].kind == "expr": if_condition = n n = n[-1] elif n[0].kind in ("expr_pjif", "expr_pjiff"): if_condition = n n = n[-1] assert n == "comp_iter" elif n[0].kind in ("or_jump_if_false_cf", "or_jump_if_false_loop_cf"): if_condition = n[1] n = n[1] else: if len(n) == 2: if_condition = n[0] n = n[1] else: if_condition = n[1] n = n[2] pass pass assert n in ("comp_body", "set_iter"), n.kind self.preorder(n[0]) if node == "generator_exp_async": self.write(" async") self.write(" for ") self.preorder(store) self.write(" in ") self.preorder(collection) if if_condition: if write_if: self.write(" if ") self.preorder(if_condition) self.prec = p def comprehension_walk( self, node, iter_index: Optional[int], ): p: int = self.prec self.prec = PRECEDENCE["lambda_body"] - 1 # FIXME: clean this up if node == "dict_comp": cn = node[1] elif node in ("generator_exp", "generator_exp_async"): if node[0] == "load_genexpr": load_genexpr = node[0] elif node[1] == "load_genexpr": load_genexpr = node[1] cn = load_genexpr[0] else: if len(node[1]) > 1 and hasattr(node[1][1], "attr"): # Python 3.3+ does this cn = node[1][1] else: assert False, "Can't find code for comprehension" assert iscode(cn.attr) code = Code(cn.attr, self.scanner, self.currentclass, self.debug_opts["asm"]) # FIXME: is there a way we can avoid this? # The problem is that in filter in top-level list comprehensions we can # encounter comprehensions of other kinds, and lambdas if is_lambda_mode(self.compile_mode): p_save = self.p self.p = get_python_parser( self.version, compile_mode="exec", is_pypy=self.is_pypy, ) tree = self.build_ast(code._tokens, code._customize, code) self.p = p_save else: tree = self.build_ast(code._tokens, code._customize, code) self.customize(code._customize) # Remove single reductions as in ("stmts", "sstmt"): while len(tree) == 1: tree = tree[0] if tree == "stmts": # FIXME: rest is a return None? # Verify this # rest = tree[1:] tree = tree[0] elif tree == "lambda_start": assert len(tree) <= 3 tree = tree[-2] if tree == "return_expr_lambda": tree = tree[1] pass if tree in ("genexpr_func_async", "genexpr_func"): for n in tree: if n.kind == "comp_iter": break pass else: n = tree[iter_index] assert n == "comp_iter", n if_condition = None write_if = False # Find the comprehension body. It is the inner-most # node that is not list_.. . while n == "comp_iter": # list_iter n = n[0] # recurse one step if n == "comp_for": if n[0] == "SETUP_LOOP": n = n[4] else: n = n[3] elif n == "comp_if": n = n[1] elif n in ( "comp_if_not", "comp_if_not_and", "comp_if_not_or", "comp_if_or", ): if_condition = n write_if = True n = n[-1] assert n == "comp_iter" assert n == "comp_body", n.kind self.preorder(n[0]) if node == "generator_exp_async": self.write(" async") iter_var_index = 2 else: iter_var_index = iter_index if tree[iter_var_index] != "store": iter_var_index = iter_index - 1 self.write(" for ") self.preorder(tree[iter_var_index]) self.write(" in ") if node[2] == "expr": iter_expr = node[2] elif node[3] == "get_aiter": iter_expr = node[3] else: iter_expr = node[-3] assert iter_expr in ("expr", "get_aiter"), iter_expr self.preorder(iter_expr) if if_condition and not tree[iter_index][0].kind.startswith("comp_if"): if write_if: self.write(" if ") self.preorder(if_condition) self.prec = p def comprehension_walk_newer( self, node, iter_index: Optional[int], code_index: int = -5, collection_node=None, ): """Non-closure-based comprehensions. Note: there are also other comprehensions. """ # FIXME: DRY with listcomp_closure3 p = self.prec self.prec = PRECEDENCE["lambda_body"] - 1 # FIXME? Nonterminals in grammar maybe should be split out better? # Maybe test on self.compile_mode? if ( isinstance(node[0], Token) and node[0].kind.startswith("LOAD") and iscode(node[0].attr) ): if node[3] == "get_aiter": compile_mode = self.compile_mode self.compile_mode = "genexpr" is_lambda = self.is_lambda self.is_lambda = True try: tree = self.get_comprehension_function(node, code_index) except GenericASTTraversalPruningException: pass self.compile_mode = compile_mode self.is_lambda = is_lambda else: tree = self.get_comprehension_function(node, code_index) elif ( len(node) > 2 and isinstance(node[2], Token) and node[2].kind.startswith("LOAD") and iscode(node[2].attr) ): tree = self.get_comprehension_function(node, 2) else: tree = node # Pick out important parts of the comprehension: # * the variable we iterate over: "store" # * the results we accumulate: "n" store = None if tree.kind == "genexpr_func_async": genexpr_func_async = tree elif tree.kind == "set_iter": # Not sure if this is correct node = tree = tree[0] elif tree.kind != "genexpr_func": # Not sure if this is still correct genexpr_func_async = tree[1] collection_node_index = None if node == "list_comp_async": # We have two different kinds of grammar rules: # list_comp_async ::= LOAD_LISTCOMP LOAD_STR MAKE_FUNCTION_0 expr ... # and: # list_comp_async ::= BUILD_LIST_0 LOAD_ARG list_afor2 if tree[0] == "expr" and tree[0][0] == "list_comp_async": tree = tree[0][0] # PyPy 3.8 has LOAD_ARG if tree[0] in ("BUILD_LIST_0", "LOAD_ARG"): list_afor2 = tree[2] assert list_afor2 == "list_afor2" store = list_afor2[1] assert store == "store" n = list_afor2[3] if list_afor2[3] == "list_iter" else list_afor2[2] collection_node_index = 1 else: # ??? pass elif node.kind in ("dict_comp_async", "set_comp_async"): # We have two different kinds of grammar rules: # dict_comp_async ::= LOAD_DICTCOMP LOAD_STR MAKE_FUNCTION_0 expr ... # set_comp_async ::= LOAD_SETCOMP LOAD_STR MAKE_FUNCTION_0 expr ... # and: # dict_comp_async ::= BUILD_MAP_0 LOAD_ARG genexpr_func_async # set_comp_async ::= BUILD_SET_0 LOAG_ARG genexpr_func_async if tree[0] == "expr": tree = tree[0] if tree[0].kind in ("BUILD_MAP_0", "BUILD_SET_0"): genexpr_func_async = tree[1] if genexpr_func_async == "genexpr_func_async": store = genexpr_func_async[2] assert store.kind.startswith("store") n = genexpr_func_async[3] else: set_afor2 = genexpr_func_async assert set_afor2 == "set_afor2" n = set_afor2[1] store = n[1] collection_node = node[3] else: # ??? pass elif node == "list_afor": collection_node = node[0] list_afor2 = node[1] assert list_afor2 == "list_afor2" store = list_afor2[1] assert store == "store" n = list_afor2[2] elif node == "set_afor2": collection_node = node[0] set_iter_async = node[1] assert set_iter_async == "set_iter_async" store = set_iter_async[1] assert store == "store" n = set_iter_async[2] elif node == "list_comp" and tree[0] == "expr": tree = tree[0][0] n = tree[iter_index] elif node == "set_comp" and tree[1] == "set_iter": n = tree[1] else: for k in tree: if k.kind in ("comp_iter", "list_iter", "set_iter", "lc_body"): n = k break pass else: n = tree[iter_index] if tree in ( "dict_comp_func", "genexpr_func", "genexpr_func_async", "generator_exp", "list_comp", "set_comp", "set_comp_func", "set_comp_func_header", ): # Find location of store for k in tree: if k.kind in ("comp_iter", "list_iter", "set_iter", "await_expr"): n = k elif k == "store": store = k break pass pass elif tree.kind in ("list_comp_async", "dict_comp_async", "set_afor2"): # Handled this condition above. pass else: # FIXME: we get this when we parse lambda's explicitly. # And here we've already printed/handled the list comprehension # this iteration is duplicate in seeing the list-comprehension code # item again. Is this a larger duplicate parsing problem? # Not sure what the best this thing to do is. # try: # n # except: # from trepan.api import debug; debug() if n.kind in ("RETURN_VALUE_LAMBDA", "return_expr_lambda"): self.prune() assert n.kind in ("list_iter", "comp_iter", "set_iter", "set_iter_async"), n # FIXME: I'm not totally sure this is right. # Find the list comprehension body. It is the inner-most # node that is not list_.. . if_nodes = [] if_node_parent = None comp_for = None comp_store = None if n == "comp_iter": comp_for = n if not store: comp_store = tree[3] # Iterate to find the inner-most comprehension body. # We'll come back to the list iteration below. while n in ( "comp_iter", "list_afor", "list_afor2", "list_iter", "set_afor", "set_afor2", "set_iter", "set_iter_async", ): # iterate one nesting deeper if n in ("list_afor", "set_afor"): n = n[1] elif n in ("list_afor2", "set_afor2", "set_iter_async"): if n[1] == "store": store = n[1] n = n[3] if n[3] in ("list_iter", "set_iter") else n[2] else: n = n[0] if n in ("comp_for", "list_for", "set_for"): collection_node = n if n[2] == "store" and not store: store = n[2] if not comp_store: comp_store = store n = n[3] assert n.kind in ("comp_iter", "list_iter", "set_iter") elif n in ("list_if_chained",): # list_if_chained ::= list_if_compare ... list_iter if_nodes.append(n[0]) assert n[0] == "list_if_compare" n = n[-1] assert n == "list_iter" elif n in ( "comp_if_not_and", "comp_if_or", "comp_if_or2", "comp_if_or_not", "comp_if_not_or", ): if_nodes.append(n[0]) n = n[-1] assert n == "comp_iter" elif n in ( "list_if", "list_if_not", "list_if37", "list_if37_not", "comp_if", "comp_if_not", ): if n in ("list_if37", "list_if37_not", "comp_if"): if n in ("comp_if", "list_if37", "list_if"): if_nodes.append(n[0]) n = n[1] else: if n in ("comp_if_not",): if_nodes.append(n) else: if_node_parent = n if_nodes.append(n[0]) if n[1] == "store": store = n[1] n = n[-2] if n[-1] == "come_from_opt" else n[-1] pass elif n.kind == "list_if_and_or": if_nodes.append(n[-1][0]) n = n[-1] assert n == "list_iter" pass # Python 2.7+ starts including set_comp_body # Python 3.5+ starts including set_comp_func assert store, "Couldn't find store in list/set comprehension" # A problem created with later Python code generation is that there # is a lambda set up with a dummy argument name that is then called # So we can't just translate that as is but need to replace the # dummy name. Below we are picking out the variable name as seen # in the code. And trying to generate code for the other parts # that don't have the dummy argument name in it. # Another approach might be to be able to pass in the source name # for the dummy argument. if node not in ("list_afor", "set_afor"): # FIXME decompile_cfg doesn't have to do this. Find out why. self.preorder(n if n.kind in ("await_expr", "LOAD_ARG") else n[0]) if node.kind in ( "dict_comp_async", "genexpr_func_async", "list_afor", "list_comp_async", "set_afor2", "set_comp_async", ): self.write(" async") # For listcomp, setcomp, etc., the collection is .0 and that's the best we # can do. So don't try to find the collection node. if not self.compile_mode.endswith("comp"): collection_node_index = None if collection_node_index is None: for i, child in enumerate(node): if child.kind in ( "expr", "expr_get_aiter", "get_aiter", "get_iter", ): collection_node_index = i break assert collection_node_index is not None elif len(node) >= 3 and node[3] == "expr": collection_node_index = 3 collection_node = node[3] assert collection_node == "expr" elif node == "comp_body": collection_node = node elif node.kind == "genexpr_func": collection_node_index = 0 elif collection_node is None: collection_node_index = -3 self.write(" for ") if comp_store: self.preorder(comp_store) # We have already added the comp_store contribution, # don't attempt to decompile it again comp_store = None else: self.preorder(store) self.write(" in ") if node == "list_afor": list_afor2 = node[1] assert list_afor2 == "list_afor2" list_iter = list_afor2[2] assert list_iter == "list_iter" self.preorder(collection_node) elif node == "set_comp_async": self.preorder(collection_node) elif node == "list_comp_async": self.preorder(node[collection_node_index]) elif is_lambda_mode(self.compile_mode) and collection_node_index is None: if node in ("list_comp_async",): self.preorder(node[1]) elif collection_node is None: assert node[3] in ("get_aiter", "get_iter"), node[3].kind self.preorder(node[3]) else: self.preorder(collection_node) else: if not collection_node: if node[3] == "set_iter": self.preorder(tree[0]) collection_node = node[collection_node_index] self.preorder(collection_node) # Here is where we handle nested list iterations which # includes their corresponding "if" conditions. if tree in ("list_comp", "set_comp") and not self.is_pypy: list_iter = tree[1] assert list_iter in ("list_iter", "set_iter") list_for = list_iter[0] if list_for in ("list_for", "set_for"): # In the grammar we have: # list_for ::= _ for_iter store list_iter ... # or # set_for ::= _ set_iter store set_iter ... list_iter_inner = list_for[3] assert list_iter_inner in ("list_iter", "set_iter") # If we have set_comp_body, we've done this above. if not ( list_iter_inner == "set_iter" and list_iter_inner[0] == "set_comp_body" ): self.preorder(list_iter_inner) if if_node_parent == list_iter_inner[0]: self.prec = p return comp_store = None pass elif tree == "set_comp_func": # Handle nested comp_for iterations. comp_iter = tree[4] assert comp_iter in ("comp_iter", "await_expr") while comp_iter == "comp_iter": comp_for = comp_iter[0] if comp_for != "comp_for": break self.preorder(comp_iter) if len(comp_for) < 4: break comp_iter = comp_for[3] assert comp_store is None if comp_store: self.preorder(comp_store) for if_node in if_nodes: if if_node != "comp_if_or": self.write(" if ") if if_node in ( "c_compare_chained37_false", "comp_if_not_and", "comp_if_not_or", "comp_if_or", "comp_if_or2", "comp_if_or_not", ): self.preorder(if_node) else: # FIXME: go over these to add more of this in the template, # not here. if if_node in ( "comp_if_not", "list_if37_not", "list_if_not", "list_if_or_not", ): self.write("not ") pass self.prec = PRECEDENCE["lambda_body"] - 1 self.preorder(if_node[0]) pass self.prec = p def get_comprehension_function(self, node, code_index: int): """ Build the body of a comprehension function and then find the comprehension node buried in the tree which may be surrounded with start-like symbols or dominiators,. """ self.prec = PRECEDENCE["lambda_body"] - 1 code_node = node[code_index] if code_node == "load_genexpr": code_node = code_node[0] code_obj = code_node.attr assert iscode(code_obj), code_node code = Code(code_obj, self.scanner, self.currentclass, self.debug_opts["asm"]) # FIXME: is there a way we can avoid this? # The problem is that in filter in top-level list comprehensions we can # encounter comprehensions of other kinds, and lambdas if is_lambda_mode(self.compile_mode): p_save = self.p self.p = get_python_parser( self.version, compile_mode="exec", is_pypy=self.is_pypy, ) tree = self.build_ast( code._tokens, code._customize, code, is_lambda=self.is_lambda ) self.p = p_save else: tree = self.build_ast( code._tokens, code._customize, code, is_lambda=self.is_lambda ) self.customize(code._customize) # skip over: sstmt, stmt, return, return_expr # and other singleton derivations if tree == "lambda_start": tree = tree[0] while len(tree) == 1 or ( tree in ("stmt", "sstmt", "return", "return_expr", "return_expr_lambda") ): self.prec = 100 tree = tree[0] return tree