references.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. import os
  2. import re
  3. from parso import python_bytes_to_unicode
  4. from jedi.debug import dbg
  5. from jedi.file_io import KnownContentFileIO, FolderIO
  6. from jedi.inference.names import SubModuleName
  7. from jedi.inference.imports import load_module_from_path
  8. from jedi.inference.filters import ParserTreeFilter
  9. from jedi.inference.gradual.conversion import convert_names
  10. _IGNORE_FOLDERS = ('.tox', '.venv', '.mypy_cache', 'venv', '__pycache__')
  11. _OPENED_FILE_LIMIT = 2000
  12. """
  13. Stats from a 2016 Lenovo Notebook running Linux:
  14. With os.walk, it takes about 10s to scan 11'000 files (without filesystem
  15. caching). Once cached it only takes 5s. So it is expected that reading all
  16. those files might take a few seconds, but not a lot more.
  17. """
  18. _PARSED_FILE_LIMIT = 30
  19. """
  20. For now we keep the amount of parsed files really low, since parsing might take
  21. easily 100ms for bigger files.
  22. """
  23. def _resolve_names(definition_names, avoid_names=()):
  24. for name in definition_names:
  25. if name in avoid_names:
  26. # Avoiding recursions here, because goto on a module name lands
  27. # on the same module.
  28. continue
  29. if not isinstance(name, SubModuleName):
  30. # SubModuleNames are not actually existing names but created
  31. # names when importing something like `import foo.bar.baz`.
  32. yield name
  33. if name.api_type == 'module':
  34. yield from _resolve_names(name.goto(), definition_names)
  35. def _dictionarize(names):
  36. return dict(
  37. (n if n.tree_name is None else n.tree_name, n)
  38. for n in names
  39. )
  40. def _find_defining_names(module_context, tree_name):
  41. found_names = _find_names(module_context, tree_name)
  42. for name in list(found_names):
  43. # Convert from/to stubs, because those might also be usages.
  44. found_names |= set(convert_names(
  45. [name],
  46. only_stubs=not name.get_root_context().is_stub(),
  47. prefer_stub_to_compiled=False
  48. ))
  49. found_names |= set(_find_global_variables(found_names, tree_name.value))
  50. for name in list(found_names):
  51. if name.api_type == 'param' or name.tree_name is None \
  52. or name.tree_name.parent.type == 'trailer':
  53. continue
  54. found_names |= set(_add_names_in_same_context(name.parent_context, name.string_name))
  55. return set(_resolve_names(found_names))
  56. def _find_names(module_context, tree_name):
  57. name = module_context.create_name(tree_name)
  58. found_names = set(name.goto())
  59. found_names.add(name)
  60. return set(_resolve_names(found_names))
  61. def _add_names_in_same_context(context, string_name):
  62. if context.tree_node is None:
  63. return
  64. until_position = None
  65. while True:
  66. filter_ = ParserTreeFilter(
  67. parent_context=context,
  68. until_position=until_position,
  69. )
  70. names = set(filter_.get(string_name))
  71. if not names:
  72. break
  73. yield from names
  74. ordered = sorted(names, key=lambda x: x.start_pos)
  75. until_position = ordered[0].start_pos
  76. def _find_global_variables(names, search_name):
  77. for name in names:
  78. if name.tree_name is None:
  79. continue
  80. module_context = name.get_root_context()
  81. try:
  82. method = module_context.get_global_filter
  83. except AttributeError:
  84. continue
  85. else:
  86. for global_name in method().get(search_name):
  87. yield global_name
  88. c = module_context.create_context(global_name.tree_name)
  89. yield from _add_names_in_same_context(c, global_name.string_name)
  90. def find_references(module_context, tree_name, only_in_module=False):
  91. inf = module_context.inference_state
  92. search_name = tree_name.value
  93. # We disable flow analysis, because if we have ifs that are only true in
  94. # certain cases, we want both sides.
  95. try:
  96. inf.flow_analysis_enabled = False
  97. found_names = _find_defining_names(module_context, tree_name)
  98. finally:
  99. inf.flow_analysis_enabled = True
  100. found_names_dct = _dictionarize(found_names)
  101. module_contexts = [module_context]
  102. if not only_in_module:
  103. for m in set(d.get_root_context() for d in found_names):
  104. if m != module_context and m.tree_node is not None \
  105. and inf.project.path in m.py__file__().parents:
  106. module_contexts.append(m)
  107. # For param no search for other modules is necessary.
  108. if only_in_module or any(n.api_type == 'param' for n in found_names):
  109. potential_modules = module_contexts
  110. else:
  111. potential_modules = get_module_contexts_containing_name(
  112. inf,
  113. module_contexts,
  114. search_name,
  115. )
  116. non_matching_reference_maps = {}
  117. for module_context in potential_modules:
  118. for name_leaf in module_context.tree_node.get_used_names().get(search_name, []):
  119. new = _dictionarize(_find_names(module_context, name_leaf))
  120. if any(tree_name in found_names_dct for tree_name in new):
  121. found_names_dct.update(new)
  122. for tree_name in new:
  123. for dct in non_matching_reference_maps.get(tree_name, []):
  124. # A reference that was previously searched for matches
  125. # with a now found name. Merge.
  126. found_names_dct.update(dct)
  127. try:
  128. del non_matching_reference_maps[tree_name]
  129. except KeyError:
  130. pass
  131. else:
  132. for name in new:
  133. non_matching_reference_maps.setdefault(name, []).append(new)
  134. result = found_names_dct.values()
  135. if only_in_module:
  136. return [n for n in result if n.get_root_context() == module_context]
  137. return result
  138. def _check_fs(inference_state, file_io, regex):
  139. try:
  140. code = file_io.read()
  141. except FileNotFoundError:
  142. return None
  143. code = python_bytes_to_unicode(code, errors='replace')
  144. if not regex.search(code):
  145. return None
  146. new_file_io = KnownContentFileIO(file_io.path, code)
  147. m = load_module_from_path(inference_state, new_file_io)
  148. if m.is_compiled():
  149. return None
  150. return m.as_context()
  151. def gitignored_paths(folder_io, file_io):
  152. ignored_paths_abs = set()
  153. ignored_paths_rel = set()
  154. for l in file_io.read().splitlines():
  155. if not l or l.startswith(b'#') or l.startswith(b'!') or b'*' in l:
  156. continue
  157. p = l.decode('utf-8', 'ignore').rstrip('/')
  158. if '/' in p:
  159. name = p.lstrip('/')
  160. ignored_paths_abs.add(os.path.join(folder_io.path, name))
  161. else:
  162. name = p
  163. ignored_paths_rel.add((folder_io.path, name))
  164. return ignored_paths_abs, ignored_paths_rel
  165. def expand_relative_ignore_paths(folder_io, relative_paths):
  166. curr_path = folder_io.path
  167. return {os.path.join(curr_path, p[1]) for p in relative_paths if curr_path.startswith(p[0])}
  168. def recurse_find_python_folders_and_files(folder_io, except_paths=()):
  169. except_paths = set(except_paths)
  170. except_paths_relative = set()
  171. for root_folder_io, folder_ios, file_ios in folder_io.walk():
  172. # Delete folders that we don't want to iterate over.
  173. for file_io in file_ios:
  174. path = file_io.path
  175. if path.suffix in ('.py', '.pyi'):
  176. if path not in except_paths:
  177. yield None, file_io
  178. if path.name == '.gitignore':
  179. ignored_paths_abs, ignored_paths_rel = gitignored_paths(
  180. root_folder_io, file_io
  181. )
  182. except_paths |= ignored_paths_abs
  183. except_paths_relative |= ignored_paths_rel
  184. except_paths_relative_expanded = expand_relative_ignore_paths(
  185. root_folder_io, except_paths_relative
  186. )
  187. folder_ios[:] = [
  188. folder_io
  189. for folder_io in folder_ios
  190. if folder_io.path not in except_paths
  191. and folder_io.path not in except_paths_relative_expanded
  192. and folder_io.get_base_name() not in _IGNORE_FOLDERS
  193. ]
  194. for folder_io in folder_ios:
  195. yield folder_io, None
  196. def recurse_find_python_files(folder_io, except_paths=()):
  197. for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths):
  198. if file_io is not None:
  199. yield file_io
  200. def _find_python_files_in_sys_path(inference_state, module_contexts):
  201. sys_path = inference_state.get_sys_path()
  202. except_paths = set()
  203. yielded_paths = [m.py__file__() for m in module_contexts]
  204. for module_context in module_contexts:
  205. file_io = module_context.get_value().file_io
  206. if file_io is None:
  207. continue
  208. folder_io = file_io.get_parent_folder()
  209. while True:
  210. path = folder_io.path
  211. if not any(path.startswith(p) for p in sys_path) or path in except_paths:
  212. break
  213. for file_io in recurse_find_python_files(folder_io, except_paths):
  214. if file_io.path not in yielded_paths:
  215. yield file_io
  216. except_paths.add(path)
  217. folder_io = folder_io.get_parent_folder()
  218. def _find_project_modules(inference_state, module_contexts):
  219. except_ = [m.py__file__() for m in module_contexts]
  220. yield from recurse_find_python_files(FolderIO(inference_state.project.path), except_)
  221. def get_module_contexts_containing_name(inference_state, module_contexts, name,
  222. limit_reduction=1):
  223. """
  224. Search a name in the directories of modules.
  225. :param limit_reduction: Divides the limits on opening/parsing files by this
  226. factor.
  227. """
  228. # Skip non python modules
  229. for module_context in module_contexts:
  230. if module_context.is_compiled():
  231. continue
  232. yield module_context
  233. # Very short names are not searched in other modules for now to avoid lots
  234. # of file lookups.
  235. if len(name) <= 2:
  236. return
  237. # Currently not used, because there's only `scope=project` and `scope=file`
  238. # At the moment there is no such thing as `scope=sys.path`.
  239. # file_io_iterator = _find_python_files_in_sys_path(inference_state, module_contexts)
  240. file_io_iterator = _find_project_modules(inference_state, module_contexts)
  241. yield from search_in_file_ios(inference_state, file_io_iterator, name,
  242. limit_reduction=limit_reduction)
  243. def search_in_file_ios(inference_state, file_io_iterator, name,
  244. limit_reduction=1, complete=False):
  245. parse_limit = _PARSED_FILE_LIMIT / limit_reduction
  246. open_limit = _OPENED_FILE_LIMIT / limit_reduction
  247. file_io_count = 0
  248. parsed_file_count = 0
  249. regex = re.compile(r'\b' + re.escape(name) + (r'' if complete else r'\b'))
  250. for file_io in file_io_iterator:
  251. file_io_count += 1
  252. m = _check_fs(inference_state, file_io, regex)
  253. if m is not None:
  254. parsed_file_count += 1
  255. yield m
  256. if parsed_file_count >= parse_limit:
  257. dbg('Hit limit of parsed files: %s', parse_limit)
  258. break
  259. if file_io_count >= open_limit:
  260. dbg('Hit limit of opened files: %s', open_limit)
  261. break