parse_tree_builder.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. """Provides functions for the automatic building and shaping of the parse-tree."""
  2. from typing import List
  3. from .exceptions import GrammarError, ConfigurationError
  4. from .lexer import Token
  5. from .tree import Tree
  6. from .visitors import Transformer_InPlace
  7. from .visitors import _vargs_meta, _vargs_meta_inline
  8. ###{standalone
  9. from functools import partial, wraps
  10. from itertools import product
  11. class ExpandSingleChild:
  12. def __init__(self, node_builder):
  13. self.node_builder = node_builder
  14. def __call__(self, children):
  15. if len(children) == 1:
  16. return children[0]
  17. else:
  18. return self.node_builder(children)
  19. class PropagatePositions:
  20. def __init__(self, node_builder, node_filter=None):
  21. self.node_builder = node_builder
  22. self.node_filter = node_filter
  23. def __call__(self, children):
  24. res = self.node_builder(children)
  25. if isinstance(res, Tree):
  26. # Calculate positions while the tree is streaming, according to the rule:
  27. # - nodes start at the start of their first child's container,
  28. # and end at the end of their last child's container.
  29. # Containers are nodes that take up space in text, but have been inlined in the tree.
  30. res_meta = res.meta
  31. first_meta = self._pp_get_meta(children)
  32. if first_meta is not None:
  33. if not hasattr(res_meta, 'line'):
  34. # meta was already set, probably because the rule has been inlined (e.g. `?rule`)
  35. res_meta.line = getattr(first_meta, 'container_line', first_meta.line)
  36. res_meta.column = getattr(first_meta, 'container_column', first_meta.column)
  37. res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
  38. res_meta.empty = False
  39. res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line)
  40. res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column)
  41. res_meta.container_start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
  42. last_meta = self._pp_get_meta(reversed(children))
  43. if last_meta is not None:
  44. if not hasattr(res_meta, 'end_line'):
  45. res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
  46. res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
  47. res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
  48. res_meta.empty = False
  49. res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
  50. res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
  51. res_meta.container_end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
  52. return res
  53. def _pp_get_meta(self, children):
  54. for c in children:
  55. if self.node_filter is not None and not self.node_filter(c):
  56. continue
  57. if isinstance(c, Tree):
  58. if not c.meta.empty:
  59. return c.meta
  60. elif isinstance(c, Token):
  61. return c
  62. elif hasattr(c, '__lark_meta__'):
  63. return c.__lark_meta__()
  64. def make_propagate_positions(option):
  65. if callable(option):
  66. return partial(PropagatePositions, node_filter=option)
  67. elif option is True:
  68. return PropagatePositions
  69. elif option is False:
  70. return None
  71. raise ConfigurationError('Invalid option for propagate_positions: %r' % option)
  72. class ChildFilter:
  73. def __init__(self, to_include, append_none, node_builder):
  74. self.node_builder = node_builder
  75. self.to_include = to_include
  76. self.append_none = append_none
  77. def __call__(self, children):
  78. filtered = []
  79. for i, to_expand, add_none in self.to_include:
  80. if add_none:
  81. filtered += [None] * add_none
  82. if to_expand:
  83. filtered += children[i].children
  84. else:
  85. filtered.append(children[i])
  86. if self.append_none:
  87. filtered += [None] * self.append_none
  88. return self.node_builder(filtered)
  89. class ChildFilterLALR(ChildFilter):
  90. """Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"""
  91. def __call__(self, children):
  92. filtered = []
  93. for i, to_expand, add_none in self.to_include:
  94. if add_none:
  95. filtered += [None] * add_none
  96. if to_expand:
  97. if filtered:
  98. filtered += children[i].children
  99. else: # Optimize for left-recursion
  100. filtered = children[i].children
  101. else:
  102. filtered.append(children[i])
  103. if self.append_none:
  104. filtered += [None] * self.append_none
  105. return self.node_builder(filtered)
  106. class ChildFilterLALR_NoPlaceholders(ChildFilter):
  107. "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
  108. def __init__(self, to_include, node_builder):
  109. self.node_builder = node_builder
  110. self.to_include = to_include
  111. def __call__(self, children):
  112. filtered = []
  113. for i, to_expand in self.to_include:
  114. if to_expand:
  115. if filtered:
  116. filtered += children[i].children
  117. else: # Optimize for left-recursion
  118. filtered = children[i].children
  119. else:
  120. filtered.append(children[i])
  121. return self.node_builder(filtered)
  122. def _should_expand(sym):
  123. return not sym.is_term and sym.name.startswith('_')
  124. def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]):
  125. # Prepare empty_indices as: How many Nones to insert at each index?
  126. if _empty_indices:
  127. assert _empty_indices.count(False) == len(expansion)
  128. s = ''.join(str(int(b)) for b in _empty_indices)
  129. empty_indices = [len(ones) for ones in s.split('0')]
  130. assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
  131. else:
  132. empty_indices = [0] * (len(expansion)+1)
  133. to_include = []
  134. nones_to_add = 0
  135. for i, sym in enumerate(expansion):
  136. nones_to_add += empty_indices[i]
  137. if keep_all_tokens or not (sym.is_term and sym.filter_out):
  138. to_include.append((i, _should_expand(sym), nones_to_add))
  139. nones_to_add = 0
  140. nones_to_add += empty_indices[len(expansion)]
  141. if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
  142. if _empty_indices or ambiguous:
  143. return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
  144. else:
  145. # LALR without placeholders
  146. return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
  147. class AmbiguousExpander:
  148. """Deal with the case where we're expanding children ('_rule') into a parent but the children
  149. are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
  150. ambiguous with as many copies as there are ambiguous children, and then copy the ambiguous children
  151. into the right parents in the right places, essentially shifting the ambiguity up the tree."""
  152. def __init__(self, to_expand, tree_class, node_builder):
  153. self.node_builder = node_builder
  154. self.tree_class = tree_class
  155. self.to_expand = to_expand
  156. def __call__(self, children):
  157. def _is_ambig_tree(t):
  158. return hasattr(t, 'data') and t.data == '_ambig'
  159. # -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
  160. # All children of an _ambig node should be a derivation of that ambig node, hence
  161. # it is safe to assume that if we see an _ambig node nested within an ambig node
  162. # it is safe to simply expand it into the parent _ambig node as an alternative derivation.
  163. ambiguous = []
  164. for i, child in enumerate(children):
  165. if _is_ambig_tree(child):
  166. if i in self.to_expand:
  167. ambiguous.append(i)
  168. child.expand_kids_by_data('_ambig')
  169. if not ambiguous:
  170. return self.node_builder(children)
  171. expand = [child.children if i in ambiguous else (child,) for i, child in enumerate(children)]
  172. return self.tree_class('_ambig', [self.node_builder(list(f)) for f in product(*expand)])
  173. def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
  174. to_expand = [i for i, sym in enumerate(expansion)
  175. if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
  176. if to_expand:
  177. return partial(AmbiguousExpander, to_expand, tree_class)
  178. class AmbiguousIntermediateExpander:
  179. """
  180. Propagate ambiguous intermediate nodes and their derivations up to the
  181. current rule.
  182. In general, converts
  183. rule
  184. _iambig
  185. _inter
  186. someChildren1
  187. ...
  188. _inter
  189. someChildren2
  190. ...
  191. someChildren3
  192. ...
  193. to
  194. _ambig
  195. rule
  196. someChildren1
  197. ...
  198. someChildren3
  199. ...
  200. rule
  201. someChildren2
  202. ...
  203. someChildren3
  204. ...
  205. rule
  206. childrenFromNestedIambigs
  207. ...
  208. someChildren3
  209. ...
  210. ...
  211. propagating up any nested '_iambig' nodes along the way.
  212. """
  213. def __init__(self, tree_class, node_builder):
  214. self.node_builder = node_builder
  215. self.tree_class = tree_class
  216. def __call__(self, children):
  217. def _is_iambig_tree(child):
  218. return hasattr(child, 'data') and child.data == '_iambig'
  219. def _collapse_iambig(children):
  220. """
  221. Recursively flatten the derivations of the parent of an '_iambig'
  222. node. Returns a list of '_inter' nodes guaranteed not
  223. to contain any nested '_iambig' nodes, or None if children does
  224. not contain an '_iambig' node.
  225. """
  226. # Due to the structure of the SPPF,
  227. # an '_iambig' node can only appear as the first child
  228. if children and _is_iambig_tree(children[0]):
  229. iambig_node = children[0]
  230. result = []
  231. for grandchild in iambig_node.children:
  232. collapsed = _collapse_iambig(grandchild.children)
  233. if collapsed:
  234. for child in collapsed:
  235. child.children += children[1:]
  236. result += collapsed
  237. else:
  238. new_tree = self.tree_class('_inter', grandchild.children + children[1:])
  239. result.append(new_tree)
  240. return result
  241. collapsed = _collapse_iambig(children)
  242. if collapsed:
  243. processed_nodes = [self.node_builder(c.children) for c in collapsed]
  244. return self.tree_class('_ambig', processed_nodes)
  245. return self.node_builder(children)
  246. def inplace_transformer(func):
  247. @wraps(func)
  248. def f(children):
  249. # function name in a Transformer is a rule name.
  250. tree = Tree(func.__name__, children)
  251. return func(tree)
  252. return f
  253. def apply_visit_wrapper(func, name, wrapper):
  254. if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
  255. raise NotImplementedError("Meta args not supported for internal transformer; use YourTransformer().transform(parser.parse()) instead")
  256. @wraps(func)
  257. def f(children):
  258. return wrapper(func, name, children, None)
  259. return f
  260. class ParseTreeBuilder:
  261. def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
  262. self.tree_class = tree_class
  263. self.propagate_positions = propagate_positions
  264. self.ambiguous = ambiguous
  265. self.maybe_placeholders = maybe_placeholders
  266. self.rule_builders = list(self._init_builders(rules))
  267. def _init_builders(self, rules):
  268. propagate_positions = make_propagate_positions(self.propagate_positions)
  269. for rule in rules:
  270. options = rule.options
  271. keep_all_tokens = options.keep_all_tokens
  272. expand_single_child = options.expand1
  273. wrapper_chain = list(filter(None, [
  274. (expand_single_child and not rule.alias) and ExpandSingleChild,
  275. maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
  276. propagate_positions,
  277. self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
  278. self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
  279. ]))
  280. yield rule, wrapper_chain
  281. def create_callback(self, transformer=None):
  282. callbacks = {}
  283. default_handler = getattr(transformer, '__default__', None)
  284. if default_handler:
  285. def default_callback(data, children):
  286. return default_handler(data, children, None)
  287. else:
  288. default_callback = self.tree_class
  289. for rule, wrapper_chain in self.rule_builders:
  290. user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
  291. try:
  292. f = getattr(transformer, user_callback_name)
  293. wrapper = getattr(f, 'visit_wrapper', None)
  294. if wrapper is not None:
  295. f = apply_visit_wrapper(f, user_callback_name, wrapper)
  296. elif isinstance(transformer, Transformer_InPlace):
  297. f = inplace_transformer(f)
  298. except AttributeError:
  299. f = partial(default_callback, user_callback_name)
  300. for w in wrapper_chain:
  301. f = w(f)
  302. if rule in callbacks:
  303. raise GrammarError("Rule '%s' already exists" % (rule,))
  304. callbacks[rule] = f
  305. return callbacks
  306. ###}