tree.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. from abc import abstractmethod, abstractproperty
  2. from typing import List, Optional, Tuple, Union
  3. from parso.utils import split_lines
  4. def search_ancestor(node: 'NodeOrLeaf', *node_types: str) -> 'Optional[BaseNode]':
  5. """
  6. Recursively looks at the parents of a node and returns the first found node
  7. that matches ``node_types``. Returns ``None`` if no matching node is found.
  8. This function is deprecated, use :meth:`NodeOrLeaf.search_ancestor` instead.
  9. :param node: The ancestors of this node will be checked.
  10. :param node_types: type names that are searched for.
  11. """
  12. return node.search_ancestor(*node_types)
  13. class NodeOrLeaf:
  14. """
  15. The base class for nodes and leaves.
  16. """
  17. __slots__ = ('parent',)
  18. type: str
  19. '''
  20. The type is a string that typically matches the types of the grammar file.
  21. '''
  22. parent: 'Optional[BaseNode]'
  23. '''
  24. The parent :class:`BaseNode` of this node or leaf.
  25. None if this is the root node.
  26. '''
  27. def get_root_node(self):
  28. """
  29. Returns the root node of a parser tree. The returned node doesn't have
  30. a parent node like all the other nodes/leaves.
  31. """
  32. scope = self
  33. while scope.parent is not None:
  34. scope = scope.parent
  35. return scope
  36. def get_next_sibling(self):
  37. """
  38. Returns the node immediately following this node in this parent's
  39. children list. If this node does not have a next sibling, it is None
  40. """
  41. parent = self.parent
  42. if parent is None:
  43. return None
  44. # Can't use index(); we need to test by identity
  45. for i, child in enumerate(parent.children):
  46. if child is self:
  47. try:
  48. return self.parent.children[i + 1]
  49. except IndexError:
  50. return None
  51. def get_previous_sibling(self):
  52. """
  53. Returns the node immediately preceding this node in this parent's
  54. children list. If this node does not have a previous sibling, it is
  55. None.
  56. """
  57. parent = self.parent
  58. if parent is None:
  59. return None
  60. # Can't use index(); we need to test by identity
  61. for i, child in enumerate(parent.children):
  62. if child is self:
  63. if i == 0:
  64. return None
  65. return self.parent.children[i - 1]
  66. def get_previous_leaf(self):
  67. """
  68. Returns the previous leaf in the parser tree.
  69. Returns `None` if this is the first element in the parser tree.
  70. """
  71. if self.parent is None:
  72. return None
  73. node = self
  74. while True:
  75. c = node.parent.children
  76. i = c.index(node)
  77. if i == 0:
  78. node = node.parent
  79. if node.parent is None:
  80. return None
  81. else:
  82. node = c[i - 1]
  83. break
  84. while True:
  85. try:
  86. node = node.children[-1]
  87. except AttributeError: # A Leaf doesn't have children.
  88. return node
  89. def get_next_leaf(self):
  90. """
  91. Returns the next leaf in the parser tree.
  92. Returns None if this is the last element in the parser tree.
  93. """
  94. if self.parent is None:
  95. return None
  96. node = self
  97. while True:
  98. c = node.parent.children
  99. i = c.index(node)
  100. if i == len(c) - 1:
  101. node = node.parent
  102. if node.parent is None:
  103. return None
  104. else:
  105. node = c[i + 1]
  106. break
  107. while True:
  108. try:
  109. node = node.children[0]
  110. except AttributeError: # A Leaf doesn't have children.
  111. return node
  112. @abstractproperty
  113. def start_pos(self) -> Tuple[int, int]:
  114. """
  115. Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`.
  116. :return tuple of int: (line, column)
  117. """
  118. @abstractproperty
  119. def end_pos(self) -> Tuple[int, int]:
  120. """
  121. Returns the end position of the prefix as a tuple, e.g. `(3, 4)`.
  122. :return tuple of int: (line, column)
  123. """
  124. @abstractmethod
  125. def get_start_pos_of_prefix(self):
  126. """
  127. Returns the start_pos of the prefix. This means basically it returns
  128. the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the
  129. prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is
  130. `(1, 2)`.
  131. :return tuple of int: (line, column)
  132. """
  133. @abstractmethod
  134. def get_first_leaf(self):
  135. """
  136. Returns the first leaf of a node or itself if this is a leaf.
  137. """
  138. @abstractmethod
  139. def get_last_leaf(self):
  140. """
  141. Returns the last leaf of a node or itself if this is a leaf.
  142. """
  143. @abstractmethod
  144. def get_code(self, include_prefix=True):
  145. """
  146. Returns the code that was the input for the parser for this node.
  147. :param include_prefix: Removes the prefix (whitespace and comments) of
  148. e.g. a statement.
  149. """
  150. def search_ancestor(self, *node_types: str) -> 'Optional[BaseNode]':
  151. """
  152. Recursively looks at the parents of this node or leaf and returns the
  153. first found node that matches ``node_types``. Returns ``None`` if no
  154. matching node is found.
  155. :param node_types: type names that are searched for.
  156. """
  157. node = self.parent
  158. while node is not None:
  159. if node.type in node_types:
  160. return node
  161. node = node.parent
  162. return None
  163. def dump(self, *, indent: Optional[Union[int, str]] = 4) -> str:
  164. """
  165. Returns a formatted dump of the parser tree rooted at this node or leaf. This is
  166. mainly useful for debugging purposes.
  167. The ``indent`` parameter is interpreted in a similar way as :py:func:`ast.dump`.
  168. If ``indent`` is a non-negative integer or string, then the tree will be
  169. pretty-printed with that indent level. An indent level of 0, negative, or ``""``
  170. will only insert newlines. ``None`` selects the single line representation.
  171. Using a positive integer indent indents that many spaces per level. If
  172. ``indent`` is a string (such as ``"\\t"``), that string is used to indent each
  173. level.
  174. :param indent: Indentation style as described above. The default indentation is
  175. 4 spaces, which yields a pretty-printed dump.
  176. >>> import parso
  177. >>> print(parso.parse("lambda x, y: x + y").dump())
  178. Module([
  179. Lambda([
  180. Keyword('lambda', (1, 0)),
  181. Param([
  182. Name('x', (1, 7), prefix=' '),
  183. Operator(',', (1, 8)),
  184. ]),
  185. Param([
  186. Name('y', (1, 10), prefix=' '),
  187. ]),
  188. Operator(':', (1, 11)),
  189. PythonNode('arith_expr', [
  190. Name('x', (1, 13), prefix=' '),
  191. Operator('+', (1, 15), prefix=' '),
  192. Name('y', (1, 17), prefix=' '),
  193. ]),
  194. ]),
  195. EndMarker('', (1, 18)),
  196. ])
  197. """
  198. if indent is None:
  199. newline = False
  200. indent_string = ''
  201. elif isinstance(indent, int):
  202. newline = True
  203. indent_string = ' ' * indent
  204. elif isinstance(indent, str):
  205. newline = True
  206. indent_string = indent
  207. else:
  208. raise TypeError(f"expect 'indent' to be int, str or None, got {indent!r}")
  209. def _format_dump(node: NodeOrLeaf, indent: str = '', top_level: bool = True) -> str:
  210. result = ''
  211. node_type = type(node).__name__
  212. if isinstance(node, Leaf):
  213. result += f'{indent}{node_type}('
  214. if isinstance(node, ErrorLeaf):
  215. result += f'{node.token_type!r}, '
  216. elif isinstance(node, TypedLeaf):
  217. result += f'{node.type!r}, '
  218. result += f'{node.value!r}, {node.start_pos!r}'
  219. if node.prefix:
  220. result += f', prefix={node.prefix!r}'
  221. result += ')'
  222. elif isinstance(node, BaseNode):
  223. result += f'{indent}{node_type}('
  224. if isinstance(node, Node):
  225. result += f'{node.type!r}, '
  226. result += '['
  227. if newline:
  228. result += '\n'
  229. for child in node.children:
  230. result += _format_dump(child, indent=indent + indent_string, top_level=False)
  231. result += f'{indent}])'
  232. else: # pragma: no cover
  233. # We shouldn't ever reach here, unless:
  234. # - `NodeOrLeaf` is incorrectly subclassed else where
  235. # - or a node's children list contains invalid nodes or leafs
  236. # Both are unexpected internal errors.
  237. raise TypeError(f'unsupported node encountered: {node!r}')
  238. if not top_level:
  239. if newline:
  240. result += ',\n'
  241. else:
  242. result += ', '
  243. return result
  244. return _format_dump(self)
  245. class Leaf(NodeOrLeaf):
  246. '''
  247. Leafs are basically tokens with a better API. Leafs exactly know where they
  248. were defined and what text preceeds them.
  249. '''
  250. __slots__ = ('value', 'line', 'column', 'prefix')
  251. prefix: str
  252. def __init__(self, value: str, start_pos: Tuple[int, int], prefix: str = '') -> None:
  253. self.value = value
  254. '''
  255. :py:func:`str` The value of the current token.
  256. '''
  257. self.start_pos = start_pos
  258. self.prefix = prefix
  259. '''
  260. :py:func:`str` Typically a mixture of whitespace and comments. Stuff
  261. that is syntactically irrelevant for the syntax tree.
  262. '''
  263. self.parent: Optional[BaseNode] = None
  264. '''
  265. The parent :class:`BaseNode` of this leaf.
  266. '''
  267. @property
  268. def start_pos(self) -> Tuple[int, int]:
  269. return self.line, self.column
  270. @start_pos.setter
  271. def start_pos(self, value: Tuple[int, int]) -> None:
  272. self.line = value[0]
  273. self.column = value[1]
  274. def get_start_pos_of_prefix(self):
  275. previous_leaf = self.get_previous_leaf()
  276. if previous_leaf is None:
  277. lines = split_lines(self.prefix)
  278. # + 1 is needed because split_lines always returns at least [''].
  279. return self.line - len(lines) + 1, 0 # It's the first leaf.
  280. return previous_leaf.end_pos
  281. def get_first_leaf(self):
  282. return self
  283. def get_last_leaf(self):
  284. return self
  285. def get_code(self, include_prefix=True):
  286. if include_prefix:
  287. return self.prefix + self.value
  288. else:
  289. return self.value
  290. @property
  291. def end_pos(self) -> Tuple[int, int]:
  292. lines = split_lines(self.value)
  293. end_pos_line = self.line + len(lines) - 1
  294. # Check for multiline token
  295. if self.line == end_pos_line:
  296. end_pos_column = self.column + len(lines[-1])
  297. else:
  298. end_pos_column = len(lines[-1])
  299. return end_pos_line, end_pos_column
  300. def __repr__(self):
  301. value = self.value
  302. if not value:
  303. value = self.type
  304. return "<%s: %s>" % (type(self).__name__, value)
  305. class TypedLeaf(Leaf):
  306. __slots__ = ('type',)
  307. def __init__(self, type, value, start_pos, prefix=''):
  308. super().__init__(value, start_pos, prefix)
  309. self.type = type
  310. class BaseNode(NodeOrLeaf):
  311. """
  312. The super class for all nodes.
  313. A node has children, a type and possibly a parent node.
  314. """
  315. __slots__ = ('children',)
  316. def __init__(self, children) -> None:
  317. self.children = children
  318. """
  319. A list of :class:`NodeOrLeaf` child nodes.
  320. """
  321. self.parent: Optional[BaseNode] = None
  322. '''
  323. The parent :class:`BaseNode` of this node.
  324. None if this is the root node.
  325. '''
  326. for child in children:
  327. child.parent = self
  328. @property
  329. def start_pos(self) -> Tuple[int, int]:
  330. return self.children[0].start_pos
  331. def get_start_pos_of_prefix(self):
  332. return self.children[0].get_start_pos_of_prefix()
  333. @property
  334. def end_pos(self) -> Tuple[int, int]:
  335. return self.children[-1].end_pos
  336. def _get_code_for_children(self, children, include_prefix):
  337. if include_prefix:
  338. return "".join(c.get_code() for c in children)
  339. else:
  340. first = children[0].get_code(include_prefix=False)
  341. return first + "".join(c.get_code() for c in children[1:])
  342. def get_code(self, include_prefix=True):
  343. return self._get_code_for_children(self.children, include_prefix)
  344. def get_leaf_for_position(self, position, include_prefixes=False):
  345. """
  346. Get the :py:class:`parso.tree.Leaf` at ``position``
  347. :param tuple position: A position tuple, row, column. Rows start from 1
  348. :param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls
  349. on whitespace or comments before a leaf
  350. :return: :py:class:`parso.tree.Leaf` at ``position``, or ``None``
  351. """
  352. def binary_search(lower, upper):
  353. if lower == upper:
  354. element = self.children[lower]
  355. if not include_prefixes and position < element.start_pos:
  356. # We're on a prefix.
  357. return None
  358. # In case we have prefixes, a leaf always matches
  359. try:
  360. return element.get_leaf_for_position(position, include_prefixes)
  361. except AttributeError:
  362. return element
  363. index = int((lower + upper) / 2)
  364. element = self.children[index]
  365. if position <= element.end_pos:
  366. return binary_search(lower, index)
  367. else:
  368. return binary_search(index + 1, upper)
  369. if not ((1, 0) <= position <= self.children[-1].end_pos):
  370. raise ValueError('Please provide a position that exists within this node.')
  371. return binary_search(0, len(self.children) - 1)
  372. def get_first_leaf(self):
  373. return self.children[0].get_first_leaf()
  374. def get_last_leaf(self):
  375. return self.children[-1].get_last_leaf()
  376. def __repr__(self):
  377. code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip()
  378. return "<%s: %s@%s,%s>" % \
  379. (type(self).__name__, code, self.start_pos[0], self.start_pos[1])
  380. class Node(BaseNode):
  381. """Concrete implementation for interior nodes."""
  382. __slots__ = ('type',)
  383. def __init__(self, type, children):
  384. super().__init__(children)
  385. self.type = type
  386. def __repr__(self):
  387. return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children)
  388. class ErrorNode(BaseNode):
  389. """
  390. A node that contains valid nodes/leaves that we're follow by a token that
  391. was invalid. This basically means that the leaf after this node is where
  392. Python would mark a syntax error.
  393. """
  394. __slots__ = ()
  395. type = 'error_node'
  396. class ErrorLeaf(Leaf):
  397. """
  398. A leaf that is either completely invalid in a language (like `$` in Python)
  399. or is invalid at that position. Like the star in `1 +* 1`.
  400. """
  401. __slots__ = ('token_type',)
  402. type = 'error_leaf'
  403. def __init__(self, token_type, value, start_pos, prefix=''):
  404. super().__init__(value, start_pos, prefix)
  405. self.token_type = token_type
  406. def __repr__(self):
  407. return "<%s: %s:%s, %s>" % \
  408. (type(self).__name__, self.token_type, repr(self.value), self.start_pos)