| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- """This is an experimental tool for reconstructing text from a shaped tree, based on a Lark grammar.
- """
- from typing import Dict, Callable, Iterable, Optional
- from .lark import Lark
- from .tree import Tree, ParseTree
- from .visitors import Transformer_InPlace
- from .lexer import Token, PatternStr, TerminalDef
- from .grammar import Terminal, NonTerminal, Symbol
- from .tree_matcher import TreeMatcher, is_discarded_terminal
- from .utils import is_id_continue
- def is_iter_empty(i):
- try:
- _ = next(i)
- return False
- except StopIteration:
- return True
- class WriteTokensTransformer(Transformer_InPlace):
- "Inserts discarded tokens into their correct place, according to the rules of grammar"
- tokens: Dict[str, TerminalDef]
- term_subs: Dict[str, Callable[[Symbol], str]]
- def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None:
- self.tokens = tokens
- self.term_subs = term_subs
- def __default__(self, data, children, meta):
- if not getattr(meta, 'match_tree', False):
- return Tree(data, children)
- iter_args = iter(children)
- to_write = []
- for sym in meta.orig_expansion:
- if is_discarded_terminal(sym):
- try:
- v = self.term_subs[sym.name](sym)
- except KeyError:
- t = self.tokens[sym.name]
- if not isinstance(t.pattern, PatternStr):
- raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
- v = t.pattern.value
- to_write.append(v)
- else:
- x = next(iter_args)
- if isinstance(x, list):
- to_write += x
- else:
- if isinstance(x, Token):
- assert Terminal(x.type) == sym, x
- else:
- assert NonTerminal(x.data) == sym, (sym, x)
- to_write.append(x)
- assert is_iter_empty(iter_args)
- return to_write
- class Reconstructor(TreeMatcher):
- """
- A Reconstructor that will, given a full parse Tree, generate source code.
- Note:
- The reconstructor cannot generate values from regexps. If you need to produce discarded
- regexes, such as newlines, use `term_subs` and provide default values for them.
- Parameters:
- parser: a Lark instance
- term_subs: a dictionary of [Terminal name as str] to [output text as str]
- """
- write_tokens: WriteTokensTransformer
- def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None:
- TreeMatcher.__init__(self, parser)
- self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
- def _reconstruct(self, tree):
- unreduced_tree = self.match_tree(tree, tree.data)
- res = self.write_tokens.transform(unreduced_tree)
- for item in res:
- if isinstance(item, Tree):
- # TODO use orig_expansion.rulename to support templates
- yield from self._reconstruct(item)
- else:
- yield item
- def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str:
- x = self._reconstruct(tree)
- if postproc:
- x = postproc(x)
- y = []
- prev_item = ''
- for item in x:
- if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
- y.append(' ')
- y.append(item)
- prev_item = item
- return ''.join(y)
|