| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469 |
- # Copyright 2016 Grist Labs, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # https://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import ast
- import numbers
- import sys
- import token
- from ast import Module
- from typing import Callable, List, Union, cast, Optional, Tuple, TYPE_CHECKING
- from . import util
- from .asttokens import ASTTokens
- from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer
- if TYPE_CHECKING:
- from .util import AstNode
- # Mapping of matching braces. To find a token here, look up token[:2].
- _matching_pairs_left = {
- (token.OP, '('): (token.OP, ')'),
- (token.OP, '['): (token.OP, ']'),
- (token.OP, '{'): (token.OP, '}'),
- }
- _matching_pairs_right = {
- (token.OP, ')'): (token.OP, '('),
- (token.OP, ']'): (token.OP, '['),
- (token.OP, '}'): (token.OP, '{'),
- }
- class MarkTokens:
- """
- Helper that visits all nodes in the AST tree and assigns .first_token and .last_token attributes
- to each of them. This is the heart of the token-marking logic.
- """
- def __init__(self, code):
- # type: (ASTTokens) -> None
- self._code = code
- self._methods = util.NodeMethods()
- self._iter_children = None # type: Optional[Callable]
- def visit_tree(self, node):
- # type: (Module) -> None
- self._iter_children = util.iter_children_func(node)
- util.visit_tree(node, self._visit_before_children, self._visit_after_children)
- def _visit_before_children(self, node, parent_token):
- # type: (AstNode, Optional[util.Token]) -> Tuple[Optional[util.Token], Optional[util.Token]]
- col = getattr(node, 'col_offset', None)
- token = self._code.get_token_from_utf8(node.lineno, col) if col is not None else None
- if not token and util.is_module(node):
- # We'll assume that a Module node starts at the start of the source code.
- token = self._code.get_token(1, 0)
- # Use our own token, or our parent's if we don't have one, to pass to child calls as
- # parent_token argument. The second value becomes the token argument of _visit_after_children.
- return (token or parent_token, token)
- def _visit_after_children(self, node, parent_token, token):
- # type: (AstNode, Optional[util.Token], Optional[util.Token]) -> None
- # This processes the node generically first, after all children have been processed.
- # Get the first and last tokens that belong to children. Note how this doesn't assume that we
- # iterate through children in order that corresponds to occurrence in source code. This
- # assumption can fail (e.g. with return annotations).
- first = token
- last = None
- for child in cast(Callable, self._iter_children)(node):
- # astroid slices have especially wrong positions, we don't want them to corrupt their parents.
- if util.is_empty_astroid_slice(child):
- continue
- if not first or child.first_token.index < first.index:
- first = child.first_token
- if not last or child.last_token.index > last.index:
- last = child.last_token
- # If we don't have a first token from _visit_before_children, and there were no children, then
- # use the parent's token as the first token.
- first = first or parent_token
- # If no children, set last token to the first one.
- last = last or first
- # Statements continue to before NEWLINE. This helps cover a few different cases at once.
- if util.is_stmt(node):
- last = self._find_last_in_stmt(cast(util.Token, last))
- # Capture any unmatched brackets.
- first, last = self._expand_to_matching_pairs(cast(util.Token, first), cast(util.Token, last), node)
- # Give a chance to node-specific methods to adjust.
- nfirst, nlast = self._methods.get(self, node.__class__)(node, first, last)
- if (nfirst, nlast) != (first, last):
- # If anything changed, expand again to capture any unmatched brackets.
- nfirst, nlast = self._expand_to_matching_pairs(nfirst, nlast, node)
- node.first_token = nfirst
- node.last_token = nlast
- def _find_last_in_stmt(self, start_token):
- # type: (util.Token) -> util.Token
- t = start_token
- while (not util.match_token(t, token.NEWLINE) and
- not util.match_token(t, token.OP, ';') and
- not token.ISEOF(t.type)):
- t = self._code.next_token(t, include_extra=True)
- return self._code.prev_token(t)
- def _expand_to_matching_pairs(self, first_token, last_token, node):
- # type: (util.Token, util.Token, AstNode) -> Tuple[util.Token, util.Token]
- """
- Scan tokens in [first_token, last_token] range that are between node's children, and for any
- unmatched brackets, adjust first/last tokens to include the closing pair.
- """
- # We look for opening parens/braces among non-child tokens (i.e. tokens between our actual
- # child nodes). If we find any closing ones, we match them to the opens.
- to_match_right = [] # type: List[Tuple[int, str]]
- to_match_left = []
- for tok in self._code.token_range(first_token, last_token):
- tok_info = tok[:2]
- if to_match_right and tok_info == to_match_right[-1]:
- to_match_right.pop()
- elif tok_info in _matching_pairs_left:
- to_match_right.append(_matching_pairs_left[tok_info])
- elif tok_info in _matching_pairs_right:
- to_match_left.append(_matching_pairs_right[tok_info])
- # Once done, extend `last_token` to match any unclosed parens/braces.
- for match in reversed(to_match_right):
- last = self._code.next_token(last_token)
- # Allow for trailing commas or colons (allowed in subscripts) before the closing delimiter
- while any(util.match_token(last, token.OP, x) for x in (',', ':')):
- last = self._code.next_token(last)
- # Now check for the actual closing delimiter.
- if util.match_token(last, *match):
- last_token = last
- # And extend `first_token` to match any unclosed opening parens/braces.
- for match in to_match_left:
- first = self._code.prev_token(first_token)
- if util.match_token(first, *match):
- first_token = first
- return (first_token, last_token)
- #----------------------------------------------------------------------
- # Node visitors. Each takes a preliminary first and last tokens, and returns the adjusted pair
- # that will actually be assigned.
- def visit_default(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # pylint: disable=no-self-use
- # By default, we don't need to adjust the token we computed earlier.
- return (first_token, last_token)
- def handle_comp(self, open_brace, node, first_token, last_token):
- # type: (str, AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # For list/set/dict comprehensions, we only get the token of the first child, so adjust it to
- # include the opening brace (the closing brace will be matched automatically).
- before = self._code.prev_token(first_token)
- util.expect_token(before, token.OP, open_brace)
- return (before, last_token)
- def visit_comprehension(self,
- node, # type: AstNode
- first_token, # type: util.Token
- last_token, # type: util.Token
- ):
- # type: (...) -> Tuple[util.Token, util.Token]
- # The 'comprehension' node starts with 'for' but we only get first child; we search backwards
- # to find the 'for' keyword.
- first = self._code.find_token(first_token, token.NAME, 'for', reverse=True)
- return (first, last_token)
- def visit_if(self, node, first_token, last_token):
- # type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- while first_token.string not in ('if', 'elif'):
- first_token = self._code.prev_token(first_token)
- return first_token, last_token
- def handle_attr(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # Attribute node has ".attr" (2 tokens) after the last child.
- dot = self._code.find_token(last_token, token.OP, '.')
- name = self._code.next_token(dot)
- util.expect_token(name, token.NAME)
- return (first_token, name)
- visit_attribute = handle_attr
- visit_assignattr = handle_attr
- visit_delattr = handle_attr
- def handle_def(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # With astroid, nodes that start with a doc-string can have an empty body, in which case we
- # need to adjust the last token to include the doc string.
- if not node.body and (getattr(node, 'doc_node', None) or getattr(node, 'doc', None)): # type: ignore[union-attr]
- last_token = self._code.find_token(last_token, token.STRING)
- # Include @ from decorator
- if first_token.index > 0:
- prev = self._code.prev_token(first_token)
- if util.match_token(prev, token.OP, '@'):
- first_token = prev
- return (first_token, last_token)
- visit_classdef = handle_def
- visit_functiondef = handle_def
- def handle_following_brackets(self, node, last_token, opening_bracket):
- # type: (AstNode, util.Token, str) -> util.Token
- # This is for calls and subscripts, which have a pair of brackets
- # at the end which may contain no nodes, e.g. foo() or bar[:].
- # We look for the opening bracket and then let the matching pair be found automatically
- # Remember that last_token is at the end of all children,
- # so we are not worried about encountering a bracket that belongs to a child.
- first_child = next(cast(Callable, self._iter_children)(node))
- call_start = self._code.find_token(first_child.last_token, token.OP, opening_bracket)
- if call_start.index > last_token.index:
- last_token = call_start
- return last_token
- def visit_call(self, node, first_token, last_token):
- # type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- last_token = self.handle_following_brackets(node, last_token, '(')
- # Handling a python bug with decorators with empty parens, e.g.
- # @deco()
- # def ...
- if util.match_token(first_token, token.OP, '@'):
- first_token = self._code.next_token(first_token)
- return (first_token, last_token)
- def visit_matchclass(self, node, first_token, last_token):
- # type: (util.Token, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- last_token = self.handle_following_brackets(node, last_token, '(')
- return (first_token, last_token)
- def visit_subscript(self,
- node, # type: AstNode
- first_token, # type: util.Token
- last_token, # type: util.Token
- ):
- # type: (...) -> Tuple[util.Token, util.Token]
- last_token = self.handle_following_brackets(node, last_token, '[')
- return (first_token, last_token)
- def visit_slice(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # consume `:` tokens to the left and right. In Python 3.9, Slice nodes are
- # given a col_offset, (and end_col_offset), so this will always start inside
- # the slice, even if it is the empty slice. However, in 3.8 and below, this
- # will only expand to the full slice if the slice contains a node with a
- # col_offset. So x[:] will only get the correct tokens in 3.9, but x[1:] and
- # x[:1] will even on earlier versions of Python.
- while True:
- prev = self._code.prev_token(first_token)
- if prev.string != ':':
- break
- first_token = prev
- while True:
- next_ = self._code.next_token(last_token)
- if next_.string != ':':
- break
- last_token = next_
- return (first_token, last_token)
- def handle_bare_tuple(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # A bare tuple doesn't include parens; if there is a trailing comma, make it part of the tuple.
- maybe_comma = self._code.next_token(last_token)
- if util.match_token(maybe_comma, token.OP, ','):
- last_token = maybe_comma
- return (first_token, last_token)
- # In Python3.8 parsed tuples include parentheses when present.
- def handle_tuple_nonempty(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
- # It's a bare tuple if the first token belongs to the first child. The first child may
- # include extraneous parentheses (which don't create new nodes), so account for those too.
- child = node.elts[0]
- if TYPE_CHECKING:
- child = cast(AstNode, child)
- child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True)
- if first_token == child_first:
- return self.handle_bare_tuple(node, first_token, last_token)
- return (first_token, last_token)
- def visit_tuple(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer)
- if not node.elts:
- # An empty tuple is just "()", and we need no further info.
- return (first_token, last_token)
- return self.handle_tuple_nonempty(node, first_token, last_token)
- def _gobble_parens(self, first_token, last_token, include_all=False):
- # type: (util.Token, util.Token, bool) -> Tuple[util.Token, util.Token]
- # Expands a range of tokens to include one or all pairs of surrounding parentheses, and
- # returns (first, last) tokens that include these parens.
- while first_token.index > 0:
- prev = self._code.prev_token(first_token)
- next = self._code.next_token(last_token)
- if util.match_token(prev, token.OP, '(') and util.match_token(next, token.OP, ')'):
- first_token, last_token = prev, next
- if include_all:
- continue
- break
- return (first_token, last_token)
- def visit_str(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- return self.handle_str(first_token, last_token)
- def visit_joinedstr(self,
- node, # type: AstNode
- first_token, # type: util.Token
- last_token, # type: util.Token
- ):
- # type: (...) -> Tuple[util.Token, util.Token]
- if sys.version_info < (3, 12):
- # Older versions don't tokenize the contents of f-strings
- return self.handle_str(first_token, last_token)
- last = first_token
- while True:
- if util.match_token(last, getattr(token, "FSTRING_START")):
- # Python 3.12+ has tokens for the start (e.g. `f"`) and end (`"`)
- # of the f-string. We can't just look for the next FSTRING_END
- # because f-strings can be nested, e.g. f"{f'{x}'}", so we need
- # to treat this like matching balanced parentheses.
- count = 1
- while count > 0:
- last = self._code.next_token(last)
- # mypy complains about token.FSTRING_START and token.FSTRING_END.
- if util.match_token(last, getattr(token, "FSTRING_START")):
- count += 1
- elif util.match_token(last, getattr(token, "FSTRING_END")):
- count -= 1
- last_token = last
- last = self._code.next_token(last_token)
- elif util.match_token(last, token.STRING):
- # Similar to handle_str, we also need to handle adjacent strings.
- last_token = last
- last = self._code.next_token(last_token)
- else:
- break
- return (first_token, last_token)
- def visit_bytes(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- return self.handle_str(first_token, last_token)
- def handle_str(self, first_token, last_token):
- # type: (util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # Multiple adjacent STRING tokens form a single string.
- last = self._code.next_token(last_token)
- while util.match_token(last, token.STRING):
- last_token = last
- last = self._code.next_token(last_token)
- return (first_token, last_token)
- def handle_num(self,
- node, # type: AstNode
- value, # type: Union[complex, int, numbers.Number]
- first_token, # type: util.Token
- last_token, # type: util.Token
- ):
- # type: (...) -> Tuple[util.Token, util.Token]
- # A constant like '-1' gets turned into two tokens; this will skip the '-'.
- while util.match_token(last_token, token.OP):
- last_token = self._code.next_token(last_token)
- if isinstance(value, complex):
- # A complex number like -2j cannot be compared directly to 0
- # A complex number like 1-2j is expressed as a binary operation
- # so we don't need to worry about it
- value = value.imag
- # This makes sure that the - is included
- if value < 0 and first_token.type == token.NUMBER: # type: ignore[operator]
- first_token = self._code.prev_token(first_token)
- return (first_token, last_token)
- def visit_num(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- n = node.n # type: ignore[union-attr] # ast.Num has been removed in python 3.14
- assert isinstance(n, (complex, int, numbers.Number))
- return self.handle_num(node, n, first_token, last_token)
- def visit_const(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- assert isinstance(node, ast.Constant) or isinstance(node, nc.Const)
- if isinstance(node.value, numbers.Number):
- return self.handle_num(node, node.value, first_token, last_token)
- elif isinstance(node.value, (str, bytes)):
- return self.visit_str(node, first_token, last_token)
- return (first_token, last_token)
- visit_constant = visit_const
- def visit_keyword(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # Until python 3.9 (https://bugs.python.org/issue40141),
- # ast.keyword nodes didn't have line info. Astroid has lineno None.
- assert isinstance(node, ast.keyword) or isinstance(node, nc.Keyword)
- if node.arg is not None and getattr(node, 'lineno', None) is None:
- equals = self._code.find_token(first_token, token.OP, '=', reverse=True)
- name = self._code.prev_token(equals)
- util.expect_token(name, token.NAME, node.arg)
- first_token = name
- return (first_token, last_token)
- def visit_starred(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # Astroid has 'Starred' nodes (for "foo(*bar)" type args), but they need to be adjusted.
- if not util.match_token(first_token, token.OP, '*'):
- star = self._code.prev_token(first_token)
- if util.match_token(star, token.OP, '*'):
- first_token = star
- return (first_token, last_token)
- def visit_assignname(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- # Astroid may turn 'except' clause into AssignName, but we need to adjust it.
- if util.match_token(first_token, token.NAME, 'except'):
- colon = self._code.find_token(last_token, token.OP, ':')
- first_token = last_token = self._code.prev_token(colon)
- return (first_token, last_token)
- # Async nodes should typically start with the word 'async'
- # but Python < 3.7 doesn't put the col_offset there
- # AsyncFunctionDef is slightly different because it might have
- # decorators before that, which visit_functiondef handles
- def handle_async(self, node, first_token, last_token):
- # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
- if not first_token.string == 'async':
- first_token = self._code.prev_token(first_token)
- return (first_token, last_token)
- visit_asyncfor = handle_async
- visit_asyncwith = handle_async
- def visit_asyncfunctiondef(self,
- node, # type: AstNode
- first_token, # type: util.Token
- last_token, # type: util.Token
- ):
- # type: (...) -> Tuple[util.Token, util.Token]
- if util.match_token(first_token, token.NAME, 'def'):
- # Include the 'async' token
- first_token = self._code.prev_token(first_token)
- return self.visit_functiondef(node, first_token, last_token)
|