sys_path.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. import os
  2. import re
  3. from pathlib import Path
  4. from importlib.machinery import all_suffixes
  5. from jedi.inference.cache import inference_state_method_cache
  6. from jedi.inference.base_value import ContextualizedNode
  7. from jedi.inference.helpers import is_string, get_str_or_none
  8. from jedi.parser_utils import get_cached_code_lines
  9. from jedi.file_io import FileIO
  10. from jedi import settings
  11. from jedi import debug
  12. _BUILDOUT_PATH_INSERTION_LIMIT = 10
  13. def _abs_path(module_context, str_path: str):
  14. path = Path(str_path)
  15. if path.is_absolute():
  16. return path
  17. module_path = module_context.py__file__()
  18. if module_path is None:
  19. # In this case we have no idea where we actually are in the file
  20. # system.
  21. return None
  22. base_dir = module_path.parent
  23. return base_dir.joinpath(path).absolute()
  24. def _paths_from_assignment(module_context, expr_stmt):
  25. """
  26. Extracts the assigned strings from an assignment that looks as follows::
  27. sys.path[0:0] = ['module/path', 'another/module/path']
  28. This function is in general pretty tolerant (and therefore 'buggy').
  29. However, it's not a big issue usually to add more paths to Jedi's sys_path,
  30. because it will only affect Jedi in very random situations and by adding
  31. more paths than necessary, it usually benefits the general user.
  32. """
  33. for assignee, operator in zip(expr_stmt.children[::2], expr_stmt.children[1::2]):
  34. try:
  35. assert operator in ['=', '+=']
  36. assert assignee.type in ('power', 'atom_expr') and \
  37. len(assignee.children) > 1
  38. c = assignee.children
  39. assert c[0].type == 'name' and c[0].value == 'sys'
  40. trailer = c[1]
  41. assert trailer.children[0] == '.' and trailer.children[1].value == 'path'
  42. # TODO Essentially we're not checking details on sys.path
  43. # manipulation. Both assigment of the sys.path and changing/adding
  44. # parts of the sys.path are the same: They get added to the end of
  45. # the current sys.path.
  46. """
  47. execution = c[2]
  48. assert execution.children[0] == '['
  49. subscript = execution.children[1]
  50. assert subscript.type == 'subscript'
  51. assert ':' in subscript.children
  52. """
  53. except AssertionError:
  54. continue
  55. cn = ContextualizedNode(module_context.create_context(expr_stmt), expr_stmt)
  56. for lazy_value in cn.infer().iterate(cn):
  57. for value in lazy_value.infer():
  58. if is_string(value):
  59. abs_path = _abs_path(module_context, value.get_safe_value())
  60. if abs_path is not None:
  61. yield abs_path
  62. def _paths_from_list_modifications(module_context, trailer1, trailer2):
  63. """ extract the path from either "sys.path.append" or "sys.path.insert" """
  64. # Guarantee that both are trailers, the first one a name and the second one
  65. # a function execution with at least one param.
  66. if not (trailer1.type == 'trailer' and trailer1.children[0] == '.'
  67. and trailer2.type == 'trailer' and trailer2.children[0] == '('
  68. and len(trailer2.children) == 3):
  69. return
  70. name = trailer1.children[1].value
  71. if name not in ['insert', 'append']:
  72. return
  73. arg = trailer2.children[1]
  74. if name == 'insert' and len(arg.children) in (3, 4): # Possible trailing comma.
  75. arg = arg.children[2]
  76. for value in module_context.create_context(arg).infer_node(arg):
  77. p = get_str_or_none(value)
  78. if p is None:
  79. continue
  80. abs_path = _abs_path(module_context, p)
  81. if abs_path is not None:
  82. yield abs_path
  83. @inference_state_method_cache(default=[])
  84. def check_sys_path_modifications(module_context):
  85. """
  86. Detect sys.path modifications within module.
  87. """
  88. def get_sys_path_powers(names):
  89. for name in names:
  90. power = name.parent.parent
  91. if power is not None and power.type in ('power', 'atom_expr'):
  92. c = power.children
  93. if c[0].type == 'name' and c[0].value == 'sys' \
  94. and c[1].type == 'trailer':
  95. n = c[1].children[1]
  96. if n.type == 'name' and n.value == 'path':
  97. yield name, power
  98. if module_context.tree_node is None:
  99. return []
  100. added = []
  101. try:
  102. possible_names = module_context.tree_node.get_used_names()['path']
  103. except KeyError:
  104. pass
  105. else:
  106. for name, power in get_sys_path_powers(possible_names):
  107. expr_stmt = power.parent
  108. if len(power.children) >= 4:
  109. added.extend(
  110. _paths_from_list_modifications(
  111. module_context, *power.children[2:4]
  112. )
  113. )
  114. elif expr_stmt is not None and expr_stmt.type == 'expr_stmt':
  115. added.extend(_paths_from_assignment(module_context, expr_stmt))
  116. return added
  117. def discover_buildout_paths(inference_state, script_path):
  118. buildout_script_paths = set()
  119. for buildout_script_path in _get_buildout_script_paths(script_path):
  120. for path in _get_paths_from_buildout_script(inference_state, buildout_script_path):
  121. buildout_script_paths.add(path)
  122. if len(buildout_script_paths) >= _BUILDOUT_PATH_INSERTION_LIMIT:
  123. break
  124. return buildout_script_paths
  125. def _get_paths_from_buildout_script(inference_state, buildout_script_path):
  126. file_io = FileIO(str(buildout_script_path))
  127. try:
  128. module_node = inference_state.parse(
  129. file_io=file_io,
  130. cache=True,
  131. cache_path=settings.cache_directory
  132. )
  133. except IOError:
  134. debug.warning('Error trying to read buildout_script: %s', buildout_script_path)
  135. return
  136. from jedi.inference.value import ModuleValue
  137. module_context = ModuleValue(
  138. inference_state, module_node,
  139. file_io=file_io,
  140. string_names=None,
  141. code_lines=get_cached_code_lines(inference_state.grammar, buildout_script_path),
  142. ).as_context()
  143. yield from check_sys_path_modifications(module_context)
  144. def _get_parent_dir_with_file(path: Path, filename):
  145. for parent in path.parents:
  146. try:
  147. if parent.joinpath(filename).is_file():
  148. return parent
  149. except OSError:
  150. continue
  151. return None
  152. def _get_buildout_script_paths(search_path: Path):
  153. """
  154. if there is a 'buildout.cfg' file in one of the parent directories of the
  155. given module it will return a list of all files in the buildout bin
  156. directory that look like python files.
  157. :param search_path: absolute path to the module.
  158. """
  159. project_root = _get_parent_dir_with_file(search_path, 'buildout.cfg')
  160. if not project_root:
  161. return
  162. bin_path = project_root.joinpath('bin')
  163. if not bin_path.exists():
  164. return
  165. for filename in os.listdir(bin_path):
  166. try:
  167. filepath = bin_path.joinpath(filename)
  168. with open(filepath, 'r') as f:
  169. firstline = f.readline()
  170. if firstline.startswith('#!') and 'python' in firstline:
  171. yield filepath
  172. except (UnicodeDecodeError, IOError) as e:
  173. # Probably a binary file; permission error or race cond. because
  174. # file got deleted. Ignore it.
  175. debug.warning(str(e))
  176. continue
  177. def remove_python_path_suffix(path):
  178. for suffix in all_suffixes() + ['.pyi']:
  179. if path.suffix == suffix:
  180. path = path.with_name(path.stem)
  181. break
  182. return path
  183. def transform_path_to_dotted(sys_path, module_path):
  184. """
  185. Returns the dotted path inside a sys.path as a list of names. e.g.
  186. >>> transform_path_to_dotted([str(Path("/foo").absolute())], Path('/foo/bar/baz.py').absolute())
  187. (('bar', 'baz'), False)
  188. Returns (None, False) if the path doesn't really resolve to anything.
  189. The second return part is if it is a package.
  190. """
  191. # First remove the suffix.
  192. module_path = remove_python_path_suffix(module_path)
  193. if module_path.name.startswith('.'):
  194. return None, False
  195. # Once the suffix was removed we are using the files as we know them. This
  196. # means that if someone uses an ending like .vim for a Python file, .vim
  197. # will be part of the returned dotted part.
  198. is_package = module_path.name == '__init__'
  199. if is_package:
  200. module_path = module_path.parent
  201. def iter_potential_solutions():
  202. for p in sys_path:
  203. if str(module_path).startswith(p):
  204. # Strip the trailing slash/backslash
  205. rest = str(module_path)[len(p):]
  206. # On Windows a path can also use a slash.
  207. if rest.startswith(os.path.sep) or rest.startswith('/'):
  208. # Remove a slash in cases it's still there.
  209. rest = rest[1:]
  210. if rest:
  211. split = rest.split(os.path.sep)
  212. if not all(split):
  213. # This means that part of the file path was empty, this
  214. # is very strange and is probably a file that is called
  215. # `.py`.
  216. return
  217. # Stub folders for foo can end with foo-stubs. Just remove
  218. # it.
  219. yield tuple(re.sub(r'-stubs$', '', s) for s in split)
  220. potential_solutions = tuple(iter_potential_solutions())
  221. if not potential_solutions:
  222. return None, False
  223. # Try to find the shortest path, this makes more sense usually, because the
  224. # user usually has venvs somewhere. This means that a path like
  225. # .tox/py37/lib/python3.7/os.py can be normal for a file. However in that
  226. # case we definitely want to return ['os'] as a path and not a crazy
  227. # ['.tox', 'py37', 'lib', 'python3.7', 'os']. Keep in mind that this is a
  228. # heuristic and there's now ay to "always" do it right.
  229. return sorted(potential_solutions, key=lambda p: len(p))[0], is_package