| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319 |
- import os
- import re
- from parso import python_bytes_to_unicode
- from jedi.debug import dbg
- from jedi.file_io import KnownContentFileIO, FolderIO
- from jedi.inference.names import SubModuleName
- from jedi.inference.imports import load_module_from_path
- from jedi.inference.filters import ParserTreeFilter
- from jedi.inference.gradual.conversion import convert_names
- _IGNORE_FOLDERS = ('.tox', '.venv', '.mypy_cache', 'venv', '__pycache__')
- _OPENED_FILE_LIMIT = 2000
- """
- Stats from a 2016 Lenovo Notebook running Linux:
- With os.walk, it takes about 10s to scan 11'000 files (without filesystem
- caching). Once cached it only takes 5s. So it is expected that reading all
- those files might take a few seconds, but not a lot more.
- """
- _PARSED_FILE_LIMIT = 30
- """
- For now we keep the amount of parsed files really low, since parsing might take
- easily 100ms for bigger files.
- """
- def _resolve_names(definition_names, avoid_names=()):
- for name in definition_names:
- if name in avoid_names:
- # Avoiding recursions here, because goto on a module name lands
- # on the same module.
- continue
- if not isinstance(name, SubModuleName):
- # SubModuleNames are not actually existing names but created
- # names when importing something like `import foo.bar.baz`.
- yield name
- if name.api_type == 'module':
- yield from _resolve_names(name.goto(), definition_names)
- def _dictionarize(names):
- return dict(
- (n if n.tree_name is None else n.tree_name, n)
- for n in names
- )
- def _find_defining_names(module_context, tree_name):
- found_names = _find_names(module_context, tree_name)
- for name in list(found_names):
- # Convert from/to stubs, because those might also be usages.
- found_names |= set(convert_names(
- [name],
- only_stubs=not name.get_root_context().is_stub(),
- prefer_stub_to_compiled=False
- ))
- found_names |= set(_find_global_variables(found_names, tree_name.value))
- for name in list(found_names):
- if name.api_type == 'param' or name.tree_name is None \
- or name.tree_name.parent.type == 'trailer':
- continue
- found_names |= set(_add_names_in_same_context(name.parent_context, name.string_name))
- return set(_resolve_names(found_names))
- def _find_names(module_context, tree_name):
- name = module_context.create_name(tree_name)
- found_names = set(name.goto())
- found_names.add(name)
- return set(_resolve_names(found_names))
- def _add_names_in_same_context(context, string_name):
- if context.tree_node is None:
- return
- until_position = None
- while True:
- filter_ = ParserTreeFilter(
- parent_context=context,
- until_position=until_position,
- )
- names = set(filter_.get(string_name))
- if not names:
- break
- yield from names
- ordered = sorted(names, key=lambda x: x.start_pos)
- until_position = ordered[0].start_pos
- def _find_global_variables(names, search_name):
- for name in names:
- if name.tree_name is None:
- continue
- module_context = name.get_root_context()
- try:
- method = module_context.get_global_filter
- except AttributeError:
- continue
- else:
- for global_name in method().get(search_name):
- yield global_name
- c = module_context.create_context(global_name.tree_name)
- yield from _add_names_in_same_context(c, global_name.string_name)
- def find_references(module_context, tree_name, only_in_module=False):
- inf = module_context.inference_state
- search_name = tree_name.value
- # We disable flow analysis, because if we have ifs that are only true in
- # certain cases, we want both sides.
- try:
- inf.flow_analysis_enabled = False
- found_names = _find_defining_names(module_context, tree_name)
- finally:
- inf.flow_analysis_enabled = True
- found_names_dct = _dictionarize(found_names)
- module_contexts = [module_context]
- if not only_in_module:
- for m in set(d.get_root_context() for d in found_names):
- if m != module_context and m.tree_node is not None \
- and inf.project.path in m.py__file__().parents:
- module_contexts.append(m)
- # For param no search for other modules is necessary.
- if only_in_module or any(n.api_type == 'param' for n in found_names):
- potential_modules = module_contexts
- else:
- potential_modules = get_module_contexts_containing_name(
- inf,
- module_contexts,
- search_name,
- )
- non_matching_reference_maps = {}
- for module_context in potential_modules:
- for name_leaf in module_context.tree_node.get_used_names().get(search_name, []):
- new = _dictionarize(_find_names(module_context, name_leaf))
- if any(tree_name in found_names_dct for tree_name in new):
- found_names_dct.update(new)
- for tree_name in new:
- for dct in non_matching_reference_maps.get(tree_name, []):
- # A reference that was previously searched for matches
- # with a now found name. Merge.
- found_names_dct.update(dct)
- try:
- del non_matching_reference_maps[tree_name]
- except KeyError:
- pass
- else:
- for name in new:
- non_matching_reference_maps.setdefault(name, []).append(new)
- result = found_names_dct.values()
- if only_in_module:
- return [n for n in result if n.get_root_context() == module_context]
- return result
- def _check_fs(inference_state, file_io, regex):
- try:
- code = file_io.read()
- except FileNotFoundError:
- return None
- code = python_bytes_to_unicode(code, errors='replace')
- if not regex.search(code):
- return None
- new_file_io = KnownContentFileIO(file_io.path, code)
- m = load_module_from_path(inference_state, new_file_io)
- if m.is_compiled():
- return None
- return m.as_context()
- def gitignored_paths(folder_io, file_io):
- ignored_paths_abs = set()
- ignored_paths_rel = set()
- for l in file_io.read().splitlines():
- if not l or l.startswith(b'#') or l.startswith(b'!') or b'*' in l:
- continue
- p = l.decode('utf-8', 'ignore').rstrip('/')
- if '/' in p:
- name = p.lstrip('/')
- ignored_paths_abs.add(os.path.join(folder_io.path, name))
- else:
- name = p
- ignored_paths_rel.add((folder_io.path, name))
- return ignored_paths_abs, ignored_paths_rel
- def expand_relative_ignore_paths(folder_io, relative_paths):
- curr_path = folder_io.path
- return {os.path.join(curr_path, p[1]) for p in relative_paths if curr_path.startswith(p[0])}
- def recurse_find_python_folders_and_files(folder_io, except_paths=()):
- except_paths = set(except_paths)
- except_paths_relative = set()
- for root_folder_io, folder_ios, file_ios in folder_io.walk():
- # Delete folders that we don't want to iterate over.
- for file_io in file_ios:
- path = file_io.path
- if path.suffix in ('.py', '.pyi'):
- if path not in except_paths:
- yield None, file_io
- if path.name == '.gitignore':
- ignored_paths_abs, ignored_paths_rel = gitignored_paths(
- root_folder_io, file_io
- )
- except_paths |= ignored_paths_abs
- except_paths_relative |= ignored_paths_rel
- except_paths_relative_expanded = expand_relative_ignore_paths(
- root_folder_io, except_paths_relative
- )
- folder_ios[:] = [
- folder_io
- for folder_io in folder_ios
- if folder_io.path not in except_paths
- and folder_io.path not in except_paths_relative_expanded
- and folder_io.get_base_name() not in _IGNORE_FOLDERS
- ]
- for folder_io in folder_ios:
- yield folder_io, None
- def recurse_find_python_files(folder_io, except_paths=()):
- for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths):
- if file_io is not None:
- yield file_io
- def _find_python_files_in_sys_path(inference_state, module_contexts):
- sys_path = inference_state.get_sys_path()
- except_paths = set()
- yielded_paths = [m.py__file__() for m in module_contexts]
- for module_context in module_contexts:
- file_io = module_context.get_value().file_io
- if file_io is None:
- continue
- folder_io = file_io.get_parent_folder()
- while True:
- path = folder_io.path
- if not any(path.startswith(p) for p in sys_path) or path in except_paths:
- break
- for file_io in recurse_find_python_files(folder_io, except_paths):
- if file_io.path not in yielded_paths:
- yield file_io
- except_paths.add(path)
- folder_io = folder_io.get_parent_folder()
- def _find_project_modules(inference_state, module_contexts):
- except_ = [m.py__file__() for m in module_contexts]
- yield from recurse_find_python_files(FolderIO(inference_state.project.path), except_)
- def get_module_contexts_containing_name(inference_state, module_contexts, name,
- limit_reduction=1):
- """
- Search a name in the directories of modules.
- :param limit_reduction: Divides the limits on opening/parsing files by this
- factor.
- """
- # Skip non python modules
- for module_context in module_contexts:
- if module_context.is_compiled():
- continue
- yield module_context
- # Very short names are not searched in other modules for now to avoid lots
- # of file lookups.
- if len(name) <= 2:
- return
- # Currently not used, because there's only `scope=project` and `scope=file`
- # At the moment there is no such thing as `scope=sys.path`.
- # file_io_iterator = _find_python_files_in_sys_path(inference_state, module_contexts)
- file_io_iterator = _find_project_modules(inference_state, module_contexts)
- yield from search_in_file_ios(inference_state, file_io_iterator, name,
- limit_reduction=limit_reduction)
- def search_in_file_ios(inference_state, file_io_iterator, name,
- limit_reduction=1, complete=False):
- parse_limit = _PARSED_FILE_LIMIT / limit_reduction
- open_limit = _OPENED_FILE_LIMIT / limit_reduction
- file_io_count = 0
- parsed_file_count = 0
- regex = re.compile(r'\b' + re.escape(name) + (r'' if complete else r'\b'))
- for file_io in file_io_iterator:
- file_io_count += 1
- m = _check_fs(inference_state, file_io, regex)
- if m is not None:
- parsed_file_count += 1
- yield m
- if parsed_file_count >= parse_limit:
- dbg('Hit limit of parsed files: %s', parse_limit)
- break
- if file_io_count >= open_limit:
- dbg('Hit limit of opened files: %s', open_limit)
- break
|