tokenutil.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. """Token-related utilities"""
  2. # Copyright (c) IPython Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. from __future__ import annotations
  5. import itertools
  6. import tokenize
  7. from io import StringIO
  8. from keyword import iskeyword
  9. from tokenize import TokenInfo
  10. from typing import Callable, NamedTuple
  11. from collections.abc import Generator
  12. class Token(NamedTuple):
  13. token: int
  14. text: str
  15. start: int
  16. end: int
  17. line: str
  18. def generate_tokens(readline: Callable) -> Generator[TokenInfo, None, None]:
  19. """wrap generate_tkens to catch EOF errors"""
  20. try:
  21. yield from tokenize.generate_tokens(readline)
  22. except tokenize.TokenError:
  23. # catch EOF error
  24. return
  25. def generate_tokens_catch_errors(
  26. readline, extra_errors_to_catch: list[str] | None = None
  27. ):
  28. default_errors_to_catch = [
  29. "unterminated string literal",
  30. "invalid non-printable character",
  31. "after line continuation character",
  32. ]
  33. assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
  34. errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
  35. tokens: list[TokenInfo] = []
  36. try:
  37. for token in tokenize.generate_tokens(readline):
  38. tokens.append(token)
  39. yield token
  40. except tokenize.TokenError as exc:
  41. if any(error in exc.args[0] for error in errors_to_catch):
  42. if tokens:
  43. start = tokens[-1].start[0], tokens[-1].end[0]
  44. end = start
  45. line = tokens[-1].line
  46. else:
  47. start = end = (1, 0)
  48. line = ""
  49. yield TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
  50. else:
  51. # Catch EOF
  52. raise
  53. def line_at_cursor(cell: str, cursor_pos: int = 0) -> tuple[str, int]:
  54. """Return the line in a cell at a given cursor position
  55. Used for calling line-based APIs that don't support multi-line input, yet.
  56. Parameters
  57. ----------
  58. cell : str
  59. multiline block of text
  60. cursor_pos : integer
  61. the cursor position
  62. Returns
  63. -------
  64. (line, offset): (string, integer)
  65. The line with the current cursor, and the character offset of the start of the line.
  66. """
  67. offset = 0
  68. lines = cell.splitlines(True)
  69. for line in lines:
  70. next_offset = offset + len(line)
  71. if not line.endswith("\n"):
  72. # If the last line doesn't have a trailing newline, treat it as if
  73. # it does so that the cursor at the end of the line still counts
  74. # as being on that line.
  75. next_offset += 1
  76. if next_offset > cursor_pos:
  77. break
  78. offset = next_offset
  79. else:
  80. line = ""
  81. return line, offset
  82. def token_at_cursor(cell: str, cursor_pos: int = 0) -> str:
  83. """Get the token at a given cursor
  84. Used for introspection.
  85. Function calls are prioritized, so the token for the callable will be returned
  86. if the cursor is anywhere inside the call.
  87. Parameters
  88. ----------
  89. cell : str
  90. A block of Python code
  91. cursor_pos : int
  92. The location of the cursor in the block where the token should be found
  93. """
  94. names: list[str] = []
  95. call_names: list[str] = []
  96. closing_call_name: str | None = None
  97. most_recent_outer_name: str | None = None
  98. offsets = {1: 0} # lines start at 1
  99. intersects_with_cursor = False
  100. cur_token_is_name = False
  101. tokens: list[Token | None] = [
  102. Token(*tup) for tup in generate_tokens(StringIO(cell).readline)
  103. ]
  104. if not tokens:
  105. return ""
  106. for prev_tok, (tok, next_tok) in zip(
  107. [None] + tokens, itertools.pairwise(tokens + [None])
  108. ):
  109. # token, text, start, end, line = tup
  110. start_line, start_col = tok.start
  111. end_line, end_col = tok.end
  112. if end_line + 1 not in offsets:
  113. # keep track of offsets for each line
  114. lines = tok.line.splitlines(True)
  115. for lineno, line in enumerate(lines, start_line + 1):
  116. if lineno not in offsets:
  117. offsets[lineno] = offsets[lineno - 1] + len(line)
  118. closing_call_name = None
  119. offset = offsets[start_line]
  120. if offset + start_col > cursor_pos:
  121. # current token starts after the cursor,
  122. # don't consume it
  123. break
  124. if cur_token_is_name := tok.token == tokenize.NAME and not iskeyword(tok.text):
  125. if (
  126. names
  127. and prev_tok
  128. and prev_tok.token == tokenize.OP
  129. and prev_tok.text == "."
  130. ):
  131. names[-1] = "%s.%s" % (names[-1], tok.text)
  132. else:
  133. names.append(tok.text)
  134. if (
  135. next_tok is not None
  136. and next_tok.token == tokenize.OP
  137. and next_tok.text == "="
  138. ):
  139. # don't inspect the lhs of an assignment
  140. names.pop(-1)
  141. cur_token_is_name = False
  142. if not call_names:
  143. most_recent_outer_name = names[-1] if names else None
  144. elif tok.token == tokenize.OP:
  145. if tok.text == "(" and names:
  146. # if we are inside a function call, inspect the function
  147. call_names.append(names[-1])
  148. elif tok.text == ")" and call_names:
  149. # keep track of the most recently popped call_name from the stack
  150. closing_call_name = call_names.pop(-1)
  151. if offsets[end_line] + end_col > cursor_pos:
  152. # we found the cursor, stop reading
  153. # if the current token intersects directly, use it instead of the call token
  154. intersects_with_cursor = offsets[start_line] + start_col <= cursor_pos
  155. break
  156. if cur_token_is_name and intersects_with_cursor:
  157. return names[-1]
  158. # if the cursor isn't directly over a name token, use the most recent
  159. # call name if we can find one
  160. elif closing_call_name:
  161. # if we're on a ")", use the most recently popped call name
  162. return closing_call_name
  163. elif call_names:
  164. # otherwise, look for the most recent call name in the stack
  165. return call_names[-1]
  166. elif most_recent_outer_name:
  167. # if we've popped all the call names, use the most recently-seen
  168. # outer name
  169. return most_recent_outer_name
  170. elif names:
  171. # failing that, use the most recently seen name
  172. return names[-1]
  173. else:
  174. # give up
  175. return ""