earley_forest.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. """"This module implements an SPPF implementation
  2. This is used as the primary output mechanism for the Earley parser
  3. in order to store complex ambiguities.
  4. Full reference and more details is here:
  5. https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
  6. """
  7. from typing import Type, AbstractSet
  8. from random import randint
  9. from collections import deque
  10. from operator import attrgetter
  11. from importlib import import_module
  12. from functools import partial
  13. from ..parse_tree_builder import AmbiguousIntermediateExpander
  14. from ..visitors import Discard
  15. from ..utils import logger, OrderedSet
  16. from ..tree import Tree
  17. class ForestNode:
  18. pass
  19. class SymbolNode(ForestNode):
  20. """
  21. A Symbol Node represents a symbol (or Intermediate LR0).
  22. Symbol nodes are keyed by the symbol (s). For intermediate nodes
  23. s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
  24. nodes, s will be a string representing the non-terminal origin (i.e.
  25. the left hand side of the rule).
  26. The children of a Symbol or Intermediate Node will always be Packed Nodes;
  27. with each Packed Node child representing a single derivation of a production.
  28. Hence a Symbol Node with a single child is unambiguous.
  29. Parameters:
  30. s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
  31. start: For dynamic lexers, the index of the start of the substring matched by this symbol (inclusive).
  32. end: For dynamic lexers, the index of the end of the substring matched by this symbol (exclusive).
  33. Properties:
  34. is_intermediate: True if this node is an intermediate node.
  35. priority: The priority of the node's symbol.
  36. """
  37. Set: Type[AbstractSet] = set # Overridden by StableSymbolNode
  38. __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate')
  39. def __init__(self, s, start, end):
  40. self.s = s
  41. self.start = start
  42. self.end = end
  43. self._children = self.Set()
  44. self.paths = self.Set()
  45. self.paths_loaded = False
  46. ### We use inf here as it can be safely negated without resorting to conditionals,
  47. # unlike None or float('NaN'), and sorts appropriately.
  48. self.priority = float('-inf')
  49. self.is_intermediate = isinstance(s, tuple)
  50. def add_family(self, lr0, rule, start, left, right):
  51. self._children.add(PackedNode(self, lr0, rule, start, left, right))
  52. def add_path(self, transitive, node):
  53. self.paths.add((transitive, node))
  54. def load_paths(self):
  55. for transitive, node in self.paths:
  56. if transitive.next_titem is not None:
  57. vn = type(self)(transitive.next_titem.s, transitive.next_titem.start, self.end)
  58. vn.add_path(transitive.next_titem, node)
  59. self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
  60. else:
  61. self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
  62. self.paths_loaded = True
  63. @property
  64. def is_ambiguous(self):
  65. """Returns True if this node is ambiguous."""
  66. return len(self.children) > 1
  67. @property
  68. def children(self):
  69. """Returns a list of this node's children sorted from greatest to
  70. least priority."""
  71. if not self.paths_loaded:
  72. self.load_paths()
  73. return sorted(self._children, key=attrgetter('sort_key'))
  74. def __iter__(self):
  75. return iter(self._children)
  76. def __repr__(self):
  77. if self.is_intermediate:
  78. rule = self.s[0]
  79. ptr = self.s[1]
  80. before = ( expansion.name for expansion in rule.expansion[:ptr] )
  81. after = ( expansion.name for expansion in rule.expansion[ptr:] )
  82. symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
  83. else:
  84. symbol = self.s.name
  85. return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
  86. class StableSymbolNode(SymbolNode):
  87. "A version of SymbolNode that uses OrderedSet for output stability"
  88. Set = OrderedSet
  89. class PackedNode(ForestNode):
  90. """
  91. A Packed Node represents a single derivation in a symbol node.
  92. Parameters:
  93. rule: The rule associated with this node.
  94. parent: The parent of this node.
  95. left: The left child of this node. ``None`` if one does not exist.
  96. right: The right child of this node. ``None`` if one does not exist.
  97. priority: The priority of this node.
  98. """
  99. __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
  100. def __init__(self, parent, s, rule, start, left, right):
  101. self.parent = parent
  102. self.s = s
  103. self.start = start
  104. self.rule = rule
  105. self.left = left
  106. self.right = right
  107. self.priority = float('-inf')
  108. self._hash = hash((self.left, self.right))
  109. @property
  110. def is_empty(self):
  111. return self.left is None and self.right is None
  112. @property
  113. def sort_key(self):
  114. """
  115. Used to sort PackedNode children of SymbolNodes.
  116. A SymbolNode has multiple PackedNodes if it matched
  117. ambiguously. Hence, we use the sort order to identify
  118. the order in which ambiguous children should be considered.
  119. """
  120. return self.is_empty, -self.priority, self.rule.order
  121. @property
  122. def children(self):
  123. """Returns a list of this node's children."""
  124. return [x for x in [self.left, self.right] if x is not None]
  125. def __iter__(self):
  126. yield self.left
  127. yield self.right
  128. def __eq__(self, other):
  129. if not isinstance(other, PackedNode):
  130. return False
  131. return self is other or (self.left == other.left and self.right == other.right)
  132. def __hash__(self):
  133. return self._hash
  134. def __repr__(self):
  135. if isinstance(self.s, tuple):
  136. rule = self.s[0]
  137. ptr = self.s[1]
  138. before = ( expansion.name for expansion in rule.expansion[:ptr] )
  139. after = ( expansion.name for expansion in rule.expansion[ptr:] )
  140. symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
  141. else:
  142. symbol = self.s.name
  143. return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
  144. class TokenNode(ForestNode):
  145. """
  146. A Token Node represents a matched terminal and is always a leaf node.
  147. Parameters:
  148. token: The Token associated with this node.
  149. term: The TerminalDef matched by the token.
  150. priority: The priority of this node.
  151. """
  152. __slots__ = ('token', 'term', 'priority', '_hash')
  153. def __init__(self, token, term, priority=None):
  154. self.token = token
  155. self.term = term
  156. if priority is not None:
  157. self.priority = priority
  158. else:
  159. self.priority = term.priority if term is not None else 0
  160. self._hash = hash(token)
  161. def __eq__(self, other):
  162. if not isinstance(other, TokenNode):
  163. return False
  164. return self is other or (self.token == other.token)
  165. def __hash__(self):
  166. return self._hash
  167. def __repr__(self):
  168. return repr(self.token)
  169. class ForestVisitor:
  170. """
  171. An abstract base class for building forest visitors.
  172. This class performs a controllable depth-first walk of an SPPF.
  173. The visitor will not enter cycles and will backtrack if one is encountered.
  174. Subclasses are notified of cycles through the ``on_cycle`` method.
  175. Behavior for visit events is defined by overriding the
  176. ``visit*node*`` functions.
  177. The walk is controlled by the return values of the ``visit*node_in``
  178. methods. Returning a node(s) will schedule them to be visited. The visitor
  179. will begin to backtrack if no nodes are returned.
  180. Parameters:
  181. single_visit: If ``True``, non-Token nodes will only be visited once.
  182. """
  183. def __init__(self, single_visit=False):
  184. self.single_visit = single_visit
  185. def visit_token_node(self, node):
  186. """Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
  187. pass
  188. def visit_symbol_node_in(self, node):
  189. """Called when a symbol node is visited. Nodes that are returned
  190. will be scheduled to be visited. If ``visit_intermediate_node_in``
  191. is not implemented, this function will be called for intermediate
  192. nodes as well."""
  193. pass
  194. def visit_symbol_node_out(self, node):
  195. """Called after all nodes returned from a corresponding ``visit_symbol_node_in``
  196. call have been visited. If ``visit_intermediate_node_out``
  197. is not implemented, this function will be called for intermediate
  198. nodes as well."""
  199. pass
  200. def visit_packed_node_in(self, node):
  201. """Called when a packed node is visited. Nodes that are returned
  202. will be scheduled to be visited. """
  203. pass
  204. def visit_packed_node_out(self, node):
  205. """Called after all nodes returned from a corresponding ``visit_packed_node_in``
  206. call have been visited."""
  207. pass
  208. def on_cycle(self, node, path):
  209. """Called when a cycle is encountered.
  210. Parameters:
  211. node: The node that causes a cycle.
  212. path: The list of nodes being visited: nodes that have been
  213. entered but not exited. The first element is the root in a forest
  214. visit, and the last element is the node visited most recently.
  215. ``path`` should be treated as read-only.
  216. """
  217. pass
  218. def get_cycle_in_path(self, node, path):
  219. """A utility function for use in ``on_cycle`` to obtain a slice of
  220. ``path`` that only contains the nodes that make up the cycle."""
  221. index = len(path) - 1
  222. while id(path[index]) != id(node):
  223. index -= 1
  224. return path[index:]
  225. def visit(self, root):
  226. # Visiting is a list of IDs of all symbol/intermediate nodes currently in
  227. # the stack. It serves two purposes: to detect when we 'recurse' in and out
  228. # of a symbol/intermediate so that we can process both up and down. Also,
  229. # since the SPPF can have cycles it allows us to detect if we're trying
  230. # to recurse into a node that's already on the stack (infinite recursion).
  231. visiting = set()
  232. # set of all nodes that have been visited
  233. visited = set()
  234. # a list of nodes that are currently being visited
  235. # used for the `on_cycle` callback
  236. path = []
  237. # We do not use recursion here to walk the Forest due to the limited
  238. # stack size in python. Therefore input_stack is essentially our stack.
  239. input_stack = deque([root])
  240. # It is much faster to cache these as locals since they are called
  241. # many times in large parses.
  242. vpno = getattr(self, 'visit_packed_node_out')
  243. vpni = getattr(self, 'visit_packed_node_in')
  244. vsno = getattr(self, 'visit_symbol_node_out')
  245. vsni = getattr(self, 'visit_symbol_node_in')
  246. vino = getattr(self, 'visit_intermediate_node_out', vsno)
  247. vini = getattr(self, 'visit_intermediate_node_in', vsni)
  248. vtn = getattr(self, 'visit_token_node')
  249. oc = getattr(self, 'on_cycle')
  250. while input_stack:
  251. current = next(reversed(input_stack))
  252. try:
  253. next_node = next(current)
  254. except StopIteration:
  255. input_stack.pop()
  256. continue
  257. except TypeError:
  258. ### If the current object is not an iterator, pass through to Token/SymbolNode
  259. pass
  260. else:
  261. if next_node is None:
  262. continue
  263. if id(next_node) in visiting:
  264. oc(next_node, path)
  265. continue
  266. input_stack.append(next_node)
  267. continue
  268. if isinstance(current, TokenNode):
  269. vtn(current.token)
  270. input_stack.pop()
  271. continue
  272. current_id = id(current)
  273. if current_id in visiting:
  274. if isinstance(current, PackedNode):
  275. vpno(current)
  276. elif current.is_intermediate:
  277. vino(current)
  278. else:
  279. vsno(current)
  280. input_stack.pop()
  281. path.pop()
  282. visiting.remove(current_id)
  283. visited.add(current_id)
  284. elif self.single_visit and current_id in visited:
  285. input_stack.pop()
  286. else:
  287. visiting.add(current_id)
  288. path.append(current)
  289. if isinstance(current, PackedNode):
  290. next_node = vpni(current)
  291. elif current.is_intermediate:
  292. next_node = vini(current)
  293. else:
  294. next_node = vsni(current)
  295. if next_node is None:
  296. continue
  297. if not isinstance(next_node, ForestNode):
  298. next_node = iter(next_node)
  299. elif id(next_node) in visiting:
  300. oc(next_node, path)
  301. continue
  302. input_stack.append(next_node)
  303. class ForestTransformer(ForestVisitor):
  304. """The base class for a bottom-up forest transformation. Most users will
  305. want to use ``TreeForestTransformer`` instead as it has a friendlier
  306. interface and covers most use cases.
  307. Transformations are applied via inheritance and overriding of the
  308. ``transform*node`` methods.
  309. ``transform_token_node`` receives a ``Token`` as an argument.
  310. All other methods receive the node that is being transformed and
  311. a list of the results of the transformations of that node's children.
  312. The return value of these methods are the resulting transformations.
  313. If ``Discard`` is raised in a node's transformation, no data from that node
  314. will be passed to its parent's transformation.
  315. """
  316. def __init__(self):
  317. super(ForestTransformer, self).__init__()
  318. # results of transformations
  319. self.data = dict()
  320. # used to track parent nodes
  321. self.node_stack = deque()
  322. def transform(self, root):
  323. """Perform a transformation on an SPPF."""
  324. self.node_stack.append('result')
  325. self.data['result'] = []
  326. self.visit(root)
  327. assert len(self.data['result']) <= 1
  328. if self.data['result']:
  329. return self.data['result'][0]
  330. def transform_symbol_node(self, node, data):
  331. """Transform a symbol node."""
  332. return node
  333. def transform_intermediate_node(self, node, data):
  334. """Transform an intermediate node."""
  335. return node
  336. def transform_packed_node(self, node, data):
  337. """Transform a packed node."""
  338. return node
  339. def transform_token_node(self, node):
  340. """Transform a ``Token``."""
  341. return node
  342. def visit_symbol_node_in(self, node):
  343. self.node_stack.append(id(node))
  344. self.data[id(node)] = []
  345. return node.children
  346. def visit_packed_node_in(self, node):
  347. self.node_stack.append(id(node))
  348. self.data[id(node)] = []
  349. return node.children
  350. def visit_token_node(self, node):
  351. transformed = self.transform_token_node(node)
  352. if transformed is not Discard:
  353. self.data[self.node_stack[-1]].append(transformed)
  354. def _visit_node_out_helper(self, node, method):
  355. self.node_stack.pop()
  356. transformed = method(node, self.data[id(node)])
  357. if transformed is not Discard:
  358. self.data[self.node_stack[-1]].append(transformed)
  359. del self.data[id(node)]
  360. def visit_symbol_node_out(self, node):
  361. self._visit_node_out_helper(node, self.transform_symbol_node)
  362. def visit_intermediate_node_out(self, node):
  363. self._visit_node_out_helper(node, self.transform_intermediate_node)
  364. def visit_packed_node_out(self, node):
  365. self._visit_node_out_helper(node, self.transform_packed_node)
  366. class ForestSumVisitor(ForestVisitor):
  367. """
  368. A visitor for prioritizing ambiguous parts of the Forest.
  369. This visitor is used when support for explicit priorities on
  370. rules is requested (whether normal, or invert). It walks the
  371. forest (or subsets thereof) and cascades properties upwards
  372. from the leaves.
  373. It would be ideal to do this during parsing, however this would
  374. require processing each Earley item multiple times. That's
  375. a big performance drawback; so running a forest walk is the
  376. lesser of two evils: there can be significantly more Earley
  377. items created during parsing than there are SPPF nodes in the
  378. final tree.
  379. """
  380. def __init__(self):
  381. super(ForestSumVisitor, self).__init__(single_visit=True)
  382. def visit_packed_node_in(self, node):
  383. yield node.left
  384. yield node.right
  385. def visit_symbol_node_in(self, node):
  386. return iter(node.children)
  387. def visit_packed_node_out(self, node):
  388. priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
  389. priority += getattr(node.right, 'priority', 0)
  390. priority += getattr(node.left, 'priority', 0)
  391. node.priority = priority
  392. def visit_symbol_node_out(self, node):
  393. node.priority = max(child.priority for child in node.children)
  394. class PackedData():
  395. """Used in transformationss of packed nodes to distinguish the data
  396. that comes from the left child and the right child.
  397. """
  398. class _NoData():
  399. pass
  400. NO_DATA = _NoData()
  401. def __init__(self, node, data):
  402. self.left = self.NO_DATA
  403. self.right = self.NO_DATA
  404. if data:
  405. if node.left is not None:
  406. self.left = data[0]
  407. if len(data) > 1:
  408. self.right = data[1]
  409. else:
  410. self.right = data[0]
  411. class ForestToParseTree(ForestTransformer):
  412. """Used by the earley parser when ambiguity equals 'resolve' or
  413. 'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
  414. Parameters:
  415. tree_class: The tree class to use for construction
  416. callbacks: A dictionary of rules to functions that output a tree
  417. prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes
  418. resolve_ambiguity: If True, ambiguities will be resolved based on
  419. priorities. Otherwise, `_ambig` nodes will be in the resulting tree.
  420. use_cache: If True, the results of packed node transformations will be cached.
  421. """
  422. def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
  423. super(ForestToParseTree, self).__init__()
  424. self.tree_class = tree_class
  425. self.callbacks = callbacks
  426. self.prioritizer = prioritizer
  427. self.resolve_ambiguity = resolve_ambiguity
  428. self._use_cache = use_cache
  429. self._cache = {}
  430. self._on_cycle_retreat = False
  431. self._cycle_node = None
  432. self._successful_visits = set()
  433. def visit(self, root):
  434. if self.prioritizer:
  435. self.prioritizer.visit(root)
  436. super(ForestToParseTree, self).visit(root)
  437. self._cache = {}
  438. def on_cycle(self, node, path):
  439. logger.debug("Cycle encountered in the SPPF at node: %s. "
  440. "As infinite ambiguities cannot be represented in a tree, "
  441. "this family of derivations will be discarded.", node)
  442. self._cycle_node = node
  443. self._on_cycle_retreat = True
  444. def _check_cycle(self, node):
  445. if self._on_cycle_retreat:
  446. if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
  447. self._cycle_node = None
  448. self._on_cycle_retreat = False
  449. else:
  450. return Discard
  451. def _collapse_ambig(self, children):
  452. new_children = []
  453. for child in children:
  454. if hasattr(child, 'data') and child.data == '_ambig':
  455. new_children += child.children
  456. else:
  457. new_children.append(child)
  458. return new_children
  459. def _call_rule_func(self, node, data):
  460. # called when transforming children of symbol nodes
  461. # data is a list of trees or tokens that correspond to the
  462. # symbol's rule expansion
  463. return self.callbacks[node.rule](data)
  464. def _call_ambig_func(self, node, data):
  465. # called when transforming a symbol node
  466. # data is a list of trees where each tree's data is
  467. # equal to the name of the symbol or one of its aliases.
  468. if len(data) > 1:
  469. return self.tree_class('_ambig', data)
  470. elif data:
  471. return data[0]
  472. return Discard
  473. def transform_symbol_node(self, node, data):
  474. if id(node) not in self._successful_visits:
  475. return Discard
  476. r = self._check_cycle(node)
  477. if r is Discard:
  478. return r
  479. self._successful_visits.remove(id(node))
  480. data = self._collapse_ambig(data)
  481. return self._call_ambig_func(node, data)
  482. def transform_intermediate_node(self, node, data):
  483. if id(node) not in self._successful_visits:
  484. return Discard
  485. r = self._check_cycle(node)
  486. if r is Discard:
  487. return r
  488. self._successful_visits.remove(id(node))
  489. if len(data) > 1:
  490. children = [self.tree_class('_inter', c) for c in data]
  491. return self.tree_class('_iambig', children)
  492. return data[0]
  493. def transform_packed_node(self, node, data):
  494. r = self._check_cycle(node)
  495. if r is Discard:
  496. return r
  497. if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
  498. return Discard
  499. if self._use_cache and id(node) in self._cache:
  500. return self._cache[id(node)]
  501. children = []
  502. assert len(data) <= 2
  503. data = PackedData(node, data)
  504. if data.left is not PackedData.NO_DATA:
  505. if node.left.is_intermediate and isinstance(data.left, list):
  506. children += data.left
  507. else:
  508. children.append(data.left)
  509. if data.right is not PackedData.NO_DATA:
  510. children.append(data.right)
  511. transformed = children if node.parent.is_intermediate else self._call_rule_func(node, children)
  512. if self._use_cache:
  513. self._cache[id(node)] = transformed
  514. return transformed
  515. def visit_symbol_node_in(self, node):
  516. super(ForestToParseTree, self).visit_symbol_node_in(node)
  517. if self._on_cycle_retreat:
  518. return
  519. return node.children
  520. def visit_packed_node_in(self, node):
  521. self._on_cycle_retreat = False
  522. to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
  523. if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
  524. if not self._use_cache or id(node) not in self._cache:
  525. return to_visit
  526. def visit_packed_node_out(self, node):
  527. super(ForestToParseTree, self).visit_packed_node_out(node)
  528. if not self._on_cycle_retreat:
  529. self._successful_visits.add(id(node.parent))
  530. def handles_ambiguity(func):
  531. """Decorator for methods of subclasses of ``TreeForestTransformer``.
  532. Denotes that the method should receive a list of transformed derivations."""
  533. func.handles_ambiguity = True
  534. return func
  535. class TreeForestTransformer(ForestToParseTree):
  536. """A ``ForestTransformer`` with a tree ``Transformer``-like interface.
  537. By default, it will construct a tree.
  538. Methods provided via inheritance are called based on the rule/symbol
  539. names of nodes in the forest.
  540. Methods that act on rules will receive a list of the results of the
  541. transformations of the rule's children. By default, trees and tokens.
  542. Methods that act on tokens will receive a token.
  543. Alternatively, methods that act on rules may be annotated with
  544. ``handles_ambiguity``. In this case, the function will receive a list
  545. of all the transformations of all the derivations of the rule.
  546. By default, a list of trees where each tree.data is equal to the
  547. rule name or one of its aliases.
  548. Non-tree transformations are made possible by override of
  549. ``__default__``, ``__default_token__``, and ``__default_ambig__``.
  550. Note:
  551. Tree shaping features such as inlined rules and token filtering are
  552. not built into the transformation. Positions are also not propagated.
  553. Parameters:
  554. tree_class: The tree class to use for construction
  555. prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF.
  556. resolve_ambiguity: If True, ambiguities will be resolved based on priorities.
  557. use_cache (bool): If True, caches the results of some transformations,
  558. potentially improving performance when ``resolve_ambiguity==False``.
  559. Only use if you know what you are doing: i.e. All transformation
  560. functions are pure and referentially transparent.
  561. """
  562. def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
  563. super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
  564. def __default__(self, name, data):
  565. """Default operation on tree (for override).
  566. Returns a tree with name with data as children.
  567. """
  568. return self.tree_class(name, data)
  569. def __default_ambig__(self, name, data):
  570. """Default operation on ambiguous rule (for override).
  571. Wraps data in an '_ambig_' node if it contains more than
  572. one element.
  573. """
  574. if len(data) > 1:
  575. return self.tree_class('_ambig', data)
  576. elif data:
  577. return data[0]
  578. return Discard
  579. def __default_token__(self, node):
  580. """Default operation on ``Token`` (for override).
  581. Returns ``node``.
  582. """
  583. return node
  584. def transform_token_node(self, node):
  585. return getattr(self, node.type, self.__default_token__)(node)
  586. def _call_rule_func(self, node, data):
  587. name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
  588. user_func = getattr(self, name, self.__default__)
  589. if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
  590. user_func = partial(self.__default__, name)
  591. if not self.resolve_ambiguity:
  592. wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
  593. user_func = wrapper(user_func)
  594. return user_func(data)
  595. def _call_ambig_func(self, node, data):
  596. name = node.s.name
  597. user_func = getattr(self, name, self.__default_ambig__)
  598. if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
  599. user_func = partial(self.__default_ambig__, name)
  600. return user_func(data)
  601. class ForestToPyDotVisitor(ForestVisitor):
  602. """
  603. A Forest visitor which writes the SPPF to a PNG.
  604. The SPPF can get really large, really quickly because
  605. of the amount of meta-data it stores, so this is probably
  606. only useful for trivial trees and learning how the SPPF
  607. is structured.
  608. """
  609. def __init__(self, rankdir="TB"):
  610. super(ForestToPyDotVisitor, self).__init__(single_visit=True)
  611. self.pydot = import_module('pydot')
  612. self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
  613. def visit(self, root, filename):
  614. super(ForestToPyDotVisitor, self).visit(root)
  615. try:
  616. self.graph.write_png(filename)
  617. except FileNotFoundError as e:
  618. logger.error("Could not write png: ", e)
  619. def visit_token_node(self, node):
  620. graph_node_id = str(id(node))
  621. graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
  622. graph_node_color = 0x808080
  623. graph_node_style = "\"filled,rounded\""
  624. graph_node_shape = "diamond"
  625. graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
  626. self.graph.add_node(graph_node)
  627. def visit_packed_node_in(self, node):
  628. graph_node_id = str(id(node))
  629. graph_node_label = repr(node)
  630. graph_node_color = 0x808080
  631. graph_node_style = "filled"
  632. graph_node_shape = "diamond"
  633. graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
  634. self.graph.add_node(graph_node)
  635. yield node.left
  636. yield node.right
  637. def visit_packed_node_out(self, node):
  638. graph_node_id = str(id(node))
  639. graph_node = self.graph.get_node(graph_node_id)[0]
  640. for child in [node.left, node.right]:
  641. if child is not None:
  642. child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child))
  643. child_graph_node = self.graph.get_node(child_graph_node_id)[0]
  644. self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
  645. else:
  646. #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
  647. child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
  648. child_graph_node_style = "invis"
  649. child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
  650. child_edge_style = "invis"
  651. self.graph.add_node(child_graph_node)
  652. self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
  653. def visit_symbol_node_in(self, node):
  654. graph_node_id = str(id(node))
  655. graph_node_label = repr(node)
  656. graph_node_color = 0x808080
  657. graph_node_style = "\"filled\""
  658. if node.is_intermediate:
  659. graph_node_shape = "ellipse"
  660. else:
  661. graph_node_shape = "rectangle"
  662. graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
  663. self.graph.add_node(graph_node)
  664. return iter(node.children)
  665. def visit_symbol_node_out(self, node):
  666. graph_node_id = str(id(node))
  667. graph_node = self.graph.get_node(graph_node_id)[0]
  668. for child in node.children:
  669. child_graph_node_id = str(id(child))
  670. child_graph_node = self.graph.get_node(child_graph_node_id)[0]
  671. self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))