| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281 |
- import sys
- from copy import deepcopy
- from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, TYPE_CHECKING
- from .lexer import Token
- if TYPE_CHECKING:
- from .lexer import TerminalDef
- try:
- import rich
- except ImportError:
- pass
- from typing import Literal
- ###{standalone
- class Meta:
- empty: bool
- line: int
- column: int
- start_pos: int
- end_line: int
- end_column: int
- end_pos: int
- orig_expansion: 'List[TerminalDef]'
- match_tree: bool
- def __init__(self):
- self.empty = True
- _Leaf_T = TypeVar("_Leaf_T")
- Branch = Union[_Leaf_T, 'Tree[_Leaf_T]']
- class Tree(Generic[_Leaf_T]):
- """The main tree class.
- Creates a new tree, and stores "data" and "children" in attributes of the same name.
- Trees can be hashed and compared.
- Parameters:
- data: The name of the rule or alias
- children: List of matched sub-rules and terminals
- meta: Line & Column numbers (if ``propagate_positions`` is enabled).
- meta attributes: (line, column, end_line, end_column, start_pos, end_pos,
- container_line, container_column, container_end_line, container_end_column)
- container_* attributes consider all symbols, including those that have been inlined in the tree.
- For example, in the rule 'a: _A B _C', the regular attributes will mark the start and end of B,
- but the container_* attributes will also include _A and _C in the range. However, rules that
- contain 'a' will consider it in full, including _A and _C for all attributes.
- """
- data: str
- children: 'List[Branch[_Leaf_T]]'
- def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None:
- self.data = data
- self.children = children
- self._meta = meta
- @property
- def meta(self) -> Meta:
- if self._meta is None:
- self._meta = Meta()
- return self._meta
- def __repr__(self):
- return 'Tree(%r, %r)' % (self.data, self.children)
- __match_args__ = ("data", "children")
- def _pretty_label(self):
- return self.data
- def _pretty(self, level, indent_str):
- yield f'{indent_str*level}{self._pretty_label()}'
- if len(self.children) == 1 and not isinstance(self.children[0], Tree):
- yield f'\t{self.children[0]}\n'
- else:
- yield '\n'
- for n in self.children:
- if isinstance(n, Tree):
- yield from n._pretty(level+1, indent_str)
- else:
- yield f'{indent_str*(level+1)}{n}\n'
- def pretty(self, indent_str: str=' ') -> str:
- """Returns an indented string representation of the tree.
- Great for debugging.
- """
- return ''.join(self._pretty(0, indent_str))
- def __rich__(self, parent:Optional['rich.tree.Tree']=None) -> 'rich.tree.Tree':
- """Returns a tree widget for the 'rich' library.
- Example:
- ::
- from rich import print
- from lark import Tree
- tree = Tree('root', ['node1', 'node2'])
- print(tree)
- """
- return self._rich(parent)
- def _rich(self, parent):
- if parent:
- tree = parent.add(f'[bold]{self.data}[/bold]')
- else:
- import rich.tree
- tree = rich.tree.Tree(self.data)
- for c in self.children:
- if isinstance(c, Tree):
- c._rich(tree)
- else:
- tree.add(f'[green]{c}[/green]')
- return tree
- def __eq__(self, other):
- try:
- return self.data == other.data and self.children == other.children
- except AttributeError:
- return False
- def __ne__(self, other):
- return not (self == other)
- def __hash__(self) -> int:
- return hash((self.data, tuple(self.children)))
- def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]':
- """Depth-first iteration.
- Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
- """
- queue = [self]
- subtrees = dict()
- for subtree in queue:
- subtrees[id(subtree)] = subtree
- queue += [c for c in reversed(subtree.children)
- if isinstance(c, Tree) and id(c) not in subtrees]
- del queue
- return reversed(list(subtrees.values()))
- def iter_subtrees_topdown(self):
- """Breadth-first iteration.
- Iterates over all the subtrees, return nodes in order like pretty() does.
- """
- stack = [self]
- stack_append = stack.append
- stack_pop = stack.pop
- while stack:
- node = stack_pop()
- if not isinstance(node, Tree):
- continue
- yield node
- for child in reversed(node.children):
- stack_append(child)
- def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]':
- """Returns all nodes of the tree that evaluate pred(node) as true."""
- return filter(pred, self.iter_subtrees())
- def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]':
- """Returns all nodes of the tree whose data equals the given data."""
- return self.find_pred(lambda t: t.data == data)
- ###}
- def find_token(self, token_type: str) -> Iterator[_Leaf_T]:
- """Returns all tokens whose type equals the given token_type.
- This is a recursive function that will find tokens in all the subtrees.
- Example:
- >>> term_tokens = tree.find_token('TERM')
- """
- return self.scan_values(lambda v: isinstance(v, Token) and v.type == token_type)
- def expand_kids_by_data(self, *data_values):
- """Expand (inline) children with any of the given data values. Returns True if anything changed"""
- changed = False
- for i in range(len(self.children)-1, -1, -1):
- child = self.children[i]
- if isinstance(child, Tree) and child.data in data_values:
- self.children[i:i+1] = child.children
- changed = True
- return changed
- def scan_values(self, pred: 'Callable[[Branch[_Leaf_T]], bool]') -> Iterator[_Leaf_T]:
- """Return all values in the tree that evaluate pred(value) as true.
- This can be used to find all the tokens in the tree.
- Example:
- >>> all_tokens = tree.scan_values(lambda v: isinstance(v, Token))
- """
- for c in self.children:
- if isinstance(c, Tree):
- for t in c.scan_values(pred):
- yield t
- else:
- if pred(c):
- yield c
- def __deepcopy__(self, memo):
- return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta)
- def copy(self) -> 'Tree[_Leaf_T]':
- return type(self)(self.data, self.children)
- def set(self, data: str, children: 'List[Branch[_Leaf_T]]') -> None:
- self.data = data
- self.children = children
- ParseTree = Tree['Token']
- class SlottedTree(Tree):
- __slots__ = 'data', 'children', 'rule', '_meta'
- def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None:
- graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
- graph.write_png(filename)
- def pydot__tree_to_dot(tree: Tree, filename, rankdir="LR", **kwargs):
- graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
- graph.write(filename)
- def pydot__tree_to_graph(tree: Tree, rankdir="LR", **kwargs):
- """Creates a colorful image that represents the tree (data+children, without meta)
- Possible values for `rankdir` are "TB", "LR", "BT", "RL", corresponding to
- directed graphs drawn from top to bottom, from left to right, from bottom to
- top, and from right to left, respectively.
- `kwargs` can be any graph attribute (e. g. `dpi=200`). For a list of
- possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
- """
- import pydot # type: ignore[import-not-found]
- graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)
- i = [0]
- def new_leaf(leaf):
- node = pydot.Node(i[0], label=repr(leaf))
- i[0] += 1
- graph.add_node(node)
- return node
- def _to_pydot(subtree):
- color = hash(subtree.data) & 0xffffff
- color |= 0x808080
- subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child)
- for child in subtree.children]
- node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data)
- i[0] += 1
- graph.add_node(node)
- for subnode in subnodes:
- graph.add_edge(pydot.Edge(node, subnode))
- return node
- _to_pydot(tree)
- return graph
|