| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272 |
- import os
- import re
- from pathlib import Path
- from importlib.machinery import all_suffixes
- from jedi.inference.cache import inference_state_method_cache
- from jedi.inference.base_value import ContextualizedNode
- from jedi.inference.helpers import is_string, get_str_or_none
- from jedi.parser_utils import get_cached_code_lines
- from jedi.file_io import FileIO
- from jedi import settings
- from jedi import debug
- _BUILDOUT_PATH_INSERTION_LIMIT = 10
- def _abs_path(module_context, str_path: str):
- path = Path(str_path)
- if path.is_absolute():
- return path
- module_path = module_context.py__file__()
- if module_path is None:
- # In this case we have no idea where we actually are in the file
- # system.
- return None
- base_dir = module_path.parent
- return base_dir.joinpath(path).absolute()
- def _paths_from_assignment(module_context, expr_stmt):
- """
- Extracts the assigned strings from an assignment that looks as follows::
- sys.path[0:0] = ['module/path', 'another/module/path']
- This function is in general pretty tolerant (and therefore 'buggy').
- However, it's not a big issue usually to add more paths to Jedi's sys_path,
- because it will only affect Jedi in very random situations and by adding
- more paths than necessary, it usually benefits the general user.
- """
- for assignee, operator in zip(expr_stmt.children[::2], expr_stmt.children[1::2]):
- try:
- assert operator in ['=', '+=']
- assert assignee.type in ('power', 'atom_expr') and \
- len(assignee.children) > 1
- c = assignee.children
- assert c[0].type == 'name' and c[0].value == 'sys'
- trailer = c[1]
- assert trailer.children[0] == '.' and trailer.children[1].value == 'path'
- # TODO Essentially we're not checking details on sys.path
- # manipulation. Both assigment of the sys.path and changing/adding
- # parts of the sys.path are the same: They get added to the end of
- # the current sys.path.
- """
- execution = c[2]
- assert execution.children[0] == '['
- subscript = execution.children[1]
- assert subscript.type == 'subscript'
- assert ':' in subscript.children
- """
- except AssertionError:
- continue
- cn = ContextualizedNode(module_context.create_context(expr_stmt), expr_stmt)
- for lazy_value in cn.infer().iterate(cn):
- for value in lazy_value.infer():
- if is_string(value):
- abs_path = _abs_path(module_context, value.get_safe_value())
- if abs_path is not None:
- yield abs_path
- def _paths_from_list_modifications(module_context, trailer1, trailer2):
- """ extract the path from either "sys.path.append" or "sys.path.insert" """
- # Guarantee that both are trailers, the first one a name and the second one
- # a function execution with at least one param.
- if not (trailer1.type == 'trailer' and trailer1.children[0] == '.'
- and trailer2.type == 'trailer' and trailer2.children[0] == '('
- and len(trailer2.children) == 3):
- return
- name = trailer1.children[1].value
- if name not in ['insert', 'append']:
- return
- arg = trailer2.children[1]
- if name == 'insert' and len(arg.children) in (3, 4): # Possible trailing comma.
- arg = arg.children[2]
- for value in module_context.create_context(arg).infer_node(arg):
- p = get_str_or_none(value)
- if p is None:
- continue
- abs_path = _abs_path(module_context, p)
- if abs_path is not None:
- yield abs_path
- @inference_state_method_cache(default=[])
- def check_sys_path_modifications(module_context):
- """
- Detect sys.path modifications within module.
- """
- def get_sys_path_powers(names):
- for name in names:
- power = name.parent.parent
- if power is not None and power.type in ('power', 'atom_expr'):
- c = power.children
- if c[0].type == 'name' and c[0].value == 'sys' \
- and c[1].type == 'trailer':
- n = c[1].children[1]
- if n.type == 'name' and n.value == 'path':
- yield name, power
- if module_context.tree_node is None:
- return []
- added = []
- try:
- possible_names = module_context.tree_node.get_used_names()['path']
- except KeyError:
- pass
- else:
- for name, power in get_sys_path_powers(possible_names):
- expr_stmt = power.parent
- if len(power.children) >= 4:
- added.extend(
- _paths_from_list_modifications(
- module_context, *power.children[2:4]
- )
- )
- elif expr_stmt is not None and expr_stmt.type == 'expr_stmt':
- added.extend(_paths_from_assignment(module_context, expr_stmt))
- return added
- def discover_buildout_paths(inference_state, script_path):
- buildout_script_paths = set()
- for buildout_script_path in _get_buildout_script_paths(script_path):
- for path in _get_paths_from_buildout_script(inference_state, buildout_script_path):
- buildout_script_paths.add(path)
- if len(buildout_script_paths) >= _BUILDOUT_PATH_INSERTION_LIMIT:
- break
- return buildout_script_paths
- def _get_paths_from_buildout_script(inference_state, buildout_script_path):
- file_io = FileIO(str(buildout_script_path))
- try:
- module_node = inference_state.parse(
- file_io=file_io,
- cache=True,
- cache_path=settings.cache_directory
- )
- except IOError:
- debug.warning('Error trying to read buildout_script: %s', buildout_script_path)
- return
- from jedi.inference.value import ModuleValue
- module_context = ModuleValue(
- inference_state, module_node,
- file_io=file_io,
- string_names=None,
- code_lines=get_cached_code_lines(inference_state.grammar, buildout_script_path),
- ).as_context()
- yield from check_sys_path_modifications(module_context)
- def _get_parent_dir_with_file(path: Path, filename):
- for parent in path.parents:
- try:
- if parent.joinpath(filename).is_file():
- return parent
- except OSError:
- continue
- return None
- def _get_buildout_script_paths(search_path: Path):
- """
- if there is a 'buildout.cfg' file in one of the parent directories of the
- given module it will return a list of all files in the buildout bin
- directory that look like python files.
- :param search_path: absolute path to the module.
- """
- project_root = _get_parent_dir_with_file(search_path, 'buildout.cfg')
- if not project_root:
- return
- bin_path = project_root.joinpath('bin')
- if not bin_path.exists():
- return
- for filename in os.listdir(bin_path):
- try:
- filepath = bin_path.joinpath(filename)
- with open(filepath, 'r') as f:
- firstline = f.readline()
- if firstline.startswith('#!') and 'python' in firstline:
- yield filepath
- except (UnicodeDecodeError, IOError) as e:
- # Probably a binary file; permission error or race cond. because
- # file got deleted. Ignore it.
- debug.warning(str(e))
- continue
- def remove_python_path_suffix(path):
- for suffix in all_suffixes() + ['.pyi']:
- if path.suffix == suffix:
- path = path.with_name(path.stem)
- break
- return path
- def transform_path_to_dotted(sys_path, module_path):
- """
- Returns the dotted path inside a sys.path as a list of names. e.g.
- >>> transform_path_to_dotted([str(Path("/foo").absolute())], Path('/foo/bar/baz.py').absolute())
- (('bar', 'baz'), False)
- Returns (None, False) if the path doesn't really resolve to anything.
- The second return part is if it is a package.
- """
- # First remove the suffix.
- module_path = remove_python_path_suffix(module_path)
- if module_path.name.startswith('.'):
- return None, False
- # Once the suffix was removed we are using the files as we know them. This
- # means that if someone uses an ending like .vim for a Python file, .vim
- # will be part of the returned dotted part.
- is_package = module_path.name == '__init__'
- if is_package:
- module_path = module_path.parent
- def iter_potential_solutions():
- for p in sys_path:
- if str(module_path).startswith(p):
- # Strip the trailing slash/backslash
- rest = str(module_path)[len(p):]
- # On Windows a path can also use a slash.
- if rest.startswith(os.path.sep) or rest.startswith('/'):
- # Remove a slash in cases it's still there.
- rest = rest[1:]
- if rest:
- split = rest.split(os.path.sep)
- if not all(split):
- # This means that part of the file path was empty, this
- # is very strange and is probably a file that is called
- # `.py`.
- return
- # Stub folders for foo can end with foo-stubs. Just remove
- # it.
- yield tuple(re.sub(r'-stubs$', '', s) for s in split)
- potential_solutions = tuple(iter_potential_solutions())
- if not potential_solutions:
- return None, False
- # Try to find the shortest path, this makes more sense usually, because the
- # user usually has venvs somewhere. This means that a path like
- # .tox/py37/lib/python3.7/os.py can be normal for a file. However in that
- # case we definitely want to return ['os'] as a path and not a crazy
- # ['.tox', 'py37', 'lib', 'python3.7', 'os']. Keep in mind that this is a
- # heuristic and there's now ay to "always" do it right.
- return sorted(potential_solutions, key=lambda p: len(p))[0], is_package
|