parser_utils.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. import re
  2. import textwrap
  3. from ast import literal_eval
  4. from inspect import cleandoc
  5. from weakref import WeakKeyDictionary
  6. from parso.python import tree
  7. from parso.cache import parser_cache
  8. from parso import split_lines
  9. _EXECUTE_NODES = {'funcdef', 'classdef', 'import_from', 'import_name', 'test',
  10. 'or_test', 'and_test', 'not_test', 'comparison', 'expr',
  11. 'xor_expr', 'and_expr', 'shift_expr', 'arith_expr',
  12. 'atom_expr', 'term', 'factor', 'power', 'atom'}
  13. _FLOW_KEYWORDS = (
  14. 'try', 'except', 'finally', 'else', 'if', 'elif', 'with', 'for', 'while'
  15. )
  16. def get_executable_nodes(node, last_added=False):
  17. """
  18. For static analysis.
  19. """
  20. result = []
  21. typ = node.type
  22. if typ == 'name':
  23. next_leaf = node.get_next_leaf()
  24. if last_added is False and node.parent.type != 'param' and next_leaf != '=':
  25. result.append(node)
  26. elif typ == 'expr_stmt':
  27. # I think inferring the statement (and possibly returned arrays),
  28. # should be enough for static analysis.
  29. result.append(node)
  30. for child in node.children:
  31. result += get_executable_nodes(child, last_added=True)
  32. elif typ == 'decorator':
  33. # decorator
  34. if node.children[-2] == ')':
  35. node = node.children[-3]
  36. if node != '(':
  37. result += get_executable_nodes(node)
  38. else:
  39. try:
  40. children = node.children
  41. except AttributeError:
  42. pass
  43. else:
  44. if node.type in _EXECUTE_NODES and not last_added:
  45. result.append(node)
  46. for child in children:
  47. result += get_executable_nodes(child, last_added)
  48. return result
  49. def get_sync_comp_fors(comp_for):
  50. yield comp_for
  51. last = comp_for.children[-1]
  52. while True:
  53. if last.type == 'comp_for':
  54. yield last.children[1] # Ignore the async.
  55. elif last.type == 'sync_comp_for':
  56. yield last
  57. elif not last.type == 'comp_if':
  58. break
  59. last = last.children[-1]
  60. def for_stmt_defines_one_name(for_stmt):
  61. """
  62. Returns True if only one name is returned: ``for x in y``.
  63. Returns False if the for loop is more complicated: ``for x, z in y``.
  64. :returns: bool
  65. """
  66. return for_stmt.children[1].type == 'name'
  67. def get_flow_branch_keyword(flow_node, node):
  68. start_pos = node.start_pos
  69. if not (flow_node.start_pos < start_pos <= flow_node.end_pos):
  70. raise ValueError('The node is not part of the flow.')
  71. keyword = None
  72. for i, child in enumerate(flow_node.children):
  73. if start_pos < child.start_pos:
  74. return keyword
  75. first_leaf = child.get_first_leaf()
  76. if first_leaf in _FLOW_KEYWORDS:
  77. keyword = first_leaf
  78. return None
  79. def clean_scope_docstring(scope_node):
  80. """ Returns a cleaned version of the docstring token. """
  81. node = scope_node.get_doc_node()
  82. if node is not None:
  83. # TODO We have to check next leaves until there are no new
  84. # leaves anymore that might be part of the docstring. A
  85. # docstring can also look like this: ``'foo' 'bar'
  86. # Returns a literal cleaned version of the ``Token``.
  87. return cleandoc(safe_literal_eval(node.value))
  88. return ''
  89. def find_statement_documentation(tree_node):
  90. if tree_node.type == 'expr_stmt':
  91. tree_node = tree_node.parent # simple_stmt
  92. maybe_string = tree_node.get_next_sibling()
  93. if maybe_string is not None:
  94. if maybe_string.type == 'simple_stmt':
  95. maybe_string = maybe_string.children[0]
  96. if maybe_string.type == 'string':
  97. return cleandoc(safe_literal_eval(maybe_string.value))
  98. return ''
  99. def safe_literal_eval(value):
  100. first_two = value[:2].lower()
  101. if first_two[0] == 'f' or first_two in ('fr', 'rf'):
  102. # literal_eval is not able to resovle f literals. We have to do that
  103. # manually, but that's right now not implemented.
  104. return ''
  105. return literal_eval(value)
  106. def get_signature(funcdef, width=72, call_string=None,
  107. omit_first_param=False, omit_return_annotation=False):
  108. """
  109. Generate a string signature of a function.
  110. :param width: Fold lines if a line is longer than this value.
  111. :type width: int
  112. :arg func_name: Override function name when given.
  113. :type func_name: str
  114. :rtype: str
  115. """
  116. # Lambdas have no name.
  117. if call_string is None:
  118. if funcdef.type == 'lambdef':
  119. call_string = '<lambda>'
  120. else:
  121. call_string = funcdef.name.value
  122. params = funcdef.get_params()
  123. if omit_first_param:
  124. params = params[1:]
  125. p = '(' + ''.join(param.get_code() for param in params).strip() + ')'
  126. # TODO this is pretty bad, we should probably just normalize.
  127. p = re.sub(r'\s+', ' ', p)
  128. if funcdef.annotation and not omit_return_annotation:
  129. rtype = " ->" + funcdef.annotation.get_code()
  130. else:
  131. rtype = ""
  132. code = call_string + p + rtype
  133. return '\n'.join(textwrap.wrap(code, width))
  134. def move(node, line_offset):
  135. """
  136. Move the `Node` start_pos.
  137. """
  138. try:
  139. children = node.children
  140. except AttributeError:
  141. node.line += line_offset
  142. else:
  143. for c in children:
  144. move(c, line_offset)
  145. def get_following_comment_same_line(node):
  146. """
  147. returns (as string) any comment that appears on the same line,
  148. after the node, including the #
  149. """
  150. try:
  151. if node.type == 'for_stmt':
  152. whitespace = node.children[5].get_first_leaf().prefix
  153. elif node.type == 'with_stmt':
  154. whitespace = node.children[3].get_first_leaf().prefix
  155. elif node.type == 'funcdef':
  156. # actually on the next line
  157. whitespace = node.children[4].get_first_leaf().get_next_leaf().prefix
  158. else:
  159. whitespace = node.get_last_leaf().get_next_leaf().prefix
  160. except AttributeError:
  161. return None
  162. except ValueError:
  163. # TODO in some particular cases, the tree doesn't seem to be linked
  164. # correctly
  165. return None
  166. if "#" not in whitespace:
  167. return None
  168. comment = whitespace[whitespace.index("#"):]
  169. if "\r" in comment:
  170. comment = comment[:comment.index("\r")]
  171. if "\n" in comment:
  172. comment = comment[:comment.index("\n")]
  173. return comment
  174. def is_scope(node):
  175. t = node.type
  176. if t == 'comp_for':
  177. # Starting with Python 3.8, async is outside of the statement.
  178. return node.children[1].type != 'sync_comp_for'
  179. return t in ('file_input', 'classdef', 'funcdef', 'lambdef', 'sync_comp_for')
  180. def _get_parent_scope_cache(func):
  181. cache = WeakKeyDictionary()
  182. def wrapper(parso_cache_node, node, include_flows=False):
  183. if parso_cache_node is None:
  184. return func(node, include_flows)
  185. try:
  186. for_module = cache[parso_cache_node]
  187. except KeyError:
  188. for_module = cache[parso_cache_node] = {}
  189. try:
  190. return for_module[node]
  191. except KeyError:
  192. result = for_module[node] = func(node, include_flows)
  193. return result
  194. return wrapper
  195. def get_parent_scope(node, include_flows=False):
  196. """
  197. Returns the underlying scope.
  198. """
  199. scope = node.parent
  200. if scope is None:
  201. return None # It's a module already.
  202. while True:
  203. if is_scope(scope):
  204. if scope.type in ('classdef', 'funcdef', 'lambdef'):
  205. index = scope.children.index(':')
  206. if scope.children[index].start_pos >= node.start_pos:
  207. if node.parent.type == 'param' and node.parent.name == node:
  208. pass
  209. elif node.parent.type == 'tfpdef' and node.parent.children[0] == node:
  210. pass
  211. else:
  212. scope = scope.parent
  213. continue
  214. return scope
  215. elif include_flows and isinstance(scope, tree.Flow):
  216. # The cursor might be on `if foo`, so the parent scope will not be
  217. # the if, but the parent of the if.
  218. if not (scope.type == 'if_stmt'
  219. and any(n.start_pos <= node.start_pos < n.end_pos
  220. for n in scope.get_test_nodes())):
  221. return scope
  222. scope = scope.parent
  223. get_cached_parent_scope = _get_parent_scope_cache(get_parent_scope)
  224. def get_cached_code_lines(grammar, path):
  225. """
  226. Basically access the cached code lines in parso. This is not the nicest way
  227. to do this, but we avoid splitting all the lines again.
  228. """
  229. return get_parso_cache_node(grammar, path).lines
  230. def get_parso_cache_node(grammar, path):
  231. """
  232. This is of course not public. But as long as I control parso, this
  233. shouldn't be a problem. ~ Dave
  234. The reason for this is mostly caching. This is obviously also a sign of a
  235. broken caching architecture.
  236. """
  237. return parser_cache[grammar._hashed][path]
  238. def cut_value_at_position(leaf, position):
  239. """
  240. Cuts of the value of the leaf at position
  241. """
  242. lines = split_lines(leaf.value, keepends=True)[:position[0] - leaf.line + 1]
  243. column = position[1]
  244. if leaf.line == position[0]:
  245. column -= leaf.column
  246. if not lines:
  247. return ''
  248. lines[-1] = lines[-1][:column]
  249. return ''.join(lines)
  250. def expr_is_dotted(node):
  251. """
  252. Checks if a path looks like `name` or `name.foo.bar` and not `name()`.
  253. """
  254. if node.type == 'atom':
  255. if len(node.children) == 3 and node.children[0] == '(':
  256. return expr_is_dotted(node.children[1])
  257. return False
  258. if node.type == 'atom_expr':
  259. children = node.children
  260. if children[0] == 'await':
  261. return False
  262. if not expr_is_dotted(children[0]):
  263. return False
  264. # Check trailers
  265. return all(c.children[0] == '.' for c in children[1:])
  266. return node.type == 'name'
  267. def _function_is_x_method(decorator_checker):
  268. def wrapper(function_node):
  269. """
  270. This is a heuristic. It will not hold ALL the times, but it will be
  271. correct pretty much for anyone that doesn't try to beat it.
  272. staticmethod/classmethod are builtins and unless overwritten, this will
  273. be correct.
  274. """
  275. for decorator in function_node.get_decorators():
  276. dotted_name = decorator.children[1]
  277. if decorator_checker(dotted_name.get_code()):
  278. return True
  279. return False
  280. return wrapper
  281. function_is_staticmethod = _function_is_x_method(lambda m: m == "staticmethod")
  282. function_is_classmethod = _function_is_x_method(lambda m: m == "classmethod")
  283. function_is_property = _function_is_x_method(
  284. lambda m: m == "property"
  285. or m == "cached_property"
  286. or (m.endswith(".setter"))
  287. )