reconstruct.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. """This is an experimental tool for reconstructing text from a shaped tree, based on a Lark grammar.
  2. """
  3. from typing import Dict, Callable, Iterable, Optional
  4. from .lark import Lark
  5. from .tree import Tree, ParseTree
  6. from .visitors import Transformer_InPlace
  7. from .lexer import Token, PatternStr, TerminalDef
  8. from .grammar import Terminal, NonTerminal, Symbol
  9. from .tree_matcher import TreeMatcher, is_discarded_terminal
  10. from .utils import is_id_continue
  11. def is_iter_empty(i):
  12. try:
  13. _ = next(i)
  14. return False
  15. except StopIteration:
  16. return True
  17. class WriteTokensTransformer(Transformer_InPlace):
  18. "Inserts discarded tokens into their correct place, according to the rules of grammar"
  19. tokens: Dict[str, TerminalDef]
  20. term_subs: Dict[str, Callable[[Symbol], str]]
  21. def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None:
  22. self.tokens = tokens
  23. self.term_subs = term_subs
  24. def __default__(self, data, children, meta):
  25. if not getattr(meta, 'match_tree', False):
  26. return Tree(data, children)
  27. iter_args = iter(children)
  28. to_write = []
  29. for sym in meta.orig_expansion:
  30. if is_discarded_terminal(sym):
  31. try:
  32. v = self.term_subs[sym.name](sym)
  33. except KeyError:
  34. t = self.tokens[sym.name]
  35. if not isinstance(t.pattern, PatternStr):
  36. raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
  37. v = t.pattern.value
  38. to_write.append(v)
  39. else:
  40. x = next(iter_args)
  41. if isinstance(x, list):
  42. to_write += x
  43. else:
  44. if isinstance(x, Token):
  45. assert Terminal(x.type) == sym, x
  46. else:
  47. assert NonTerminal(x.data) == sym, (sym, x)
  48. to_write.append(x)
  49. assert is_iter_empty(iter_args)
  50. return to_write
  51. class Reconstructor(TreeMatcher):
  52. """
  53. A Reconstructor that will, given a full parse Tree, generate source code.
  54. Note:
  55. The reconstructor cannot generate values from regexps. If you need to produce discarded
  56. regexes, such as newlines, use `term_subs` and provide default values for them.
  57. Parameters:
  58. parser: a Lark instance
  59. term_subs: a dictionary of [Terminal name as str] to [output text as str]
  60. """
  61. write_tokens: WriteTokensTransformer
  62. def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None:
  63. TreeMatcher.__init__(self, parser)
  64. self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
  65. def _reconstruct(self, tree):
  66. unreduced_tree = self.match_tree(tree, tree.data)
  67. res = self.write_tokens.transform(unreduced_tree)
  68. for item in res:
  69. if isinstance(item, Tree):
  70. # TODO use orig_expansion.rulename to support templates
  71. yield from self._reconstruct(item)
  72. else:
  73. yield item
  74. def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str:
  75. x = self._reconstruct(tree)
  76. if postproc:
  77. x = postproc(x)
  78. y = []
  79. prev_item = ''
  80. for item in x:
  81. if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
  82. y.append(' ')
  83. y.append(item)
  84. prev_item = item
  85. return ''.join(y)