resolver.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. from __future__ import annotations
  2. import contextlib
  3. import functools
  4. import logging
  5. import os
  6. from typing import TYPE_CHECKING, cast
  7. from pip._vendor.packaging.utils import canonicalize_name
  8. from pip._vendor.resolvelib import BaseReporter, ResolutionImpossible, ResolutionTooDeep
  9. from pip._vendor.resolvelib import Resolver as RLResolver
  10. from pip._vendor.resolvelib.structs import DirectedGraph
  11. from pip._internal.cache import WheelCache
  12. from pip._internal.exceptions import ResolutionTooDeepError
  13. from pip._internal.index.package_finder import PackageFinder
  14. from pip._internal.operations.prepare import RequirementPreparer
  15. from pip._internal.req.constructors import install_req_extend_extras
  16. from pip._internal.req.req_install import InstallRequirement
  17. from pip._internal.req.req_set import RequirementSet
  18. from pip._internal.resolution.base import BaseResolver, InstallRequirementProvider
  19. from pip._internal.resolution.resolvelib.provider import PipProvider
  20. from pip._internal.resolution.resolvelib.reporter import (
  21. PipDebuggingReporter,
  22. PipReporter,
  23. )
  24. from pip._internal.utils.packaging import get_requirement
  25. from .base import Candidate, Requirement
  26. from .factory import Factory
  27. if TYPE_CHECKING:
  28. from pip._vendor.resolvelib.resolvers import Result as RLResult
  29. Result = RLResult[Requirement, Candidate, str]
  30. logger = logging.getLogger(__name__)
  31. class Resolver(BaseResolver):
  32. _allowed_strategies = {"eager", "only-if-needed", "to-satisfy-only"}
  33. def __init__(
  34. self,
  35. preparer: RequirementPreparer,
  36. finder: PackageFinder,
  37. wheel_cache: WheelCache | None,
  38. make_install_req: InstallRequirementProvider,
  39. use_user_site: bool,
  40. ignore_dependencies: bool,
  41. ignore_installed: bool,
  42. ignore_requires_python: bool,
  43. force_reinstall: bool,
  44. upgrade_strategy: str,
  45. py_version_info: tuple[int, ...] | None = None,
  46. ):
  47. super().__init__()
  48. assert upgrade_strategy in self._allowed_strategies
  49. self.factory = Factory(
  50. finder=finder,
  51. preparer=preparer,
  52. make_install_req=make_install_req,
  53. wheel_cache=wheel_cache,
  54. use_user_site=use_user_site,
  55. force_reinstall=force_reinstall,
  56. ignore_installed=ignore_installed,
  57. ignore_requires_python=ignore_requires_python,
  58. py_version_info=py_version_info,
  59. )
  60. self.ignore_dependencies = ignore_dependencies
  61. self.upgrade_strategy = upgrade_strategy
  62. self._result: Result | None = None
  63. def resolve(
  64. self, root_reqs: list[InstallRequirement], check_supported_wheels: bool
  65. ) -> RequirementSet:
  66. collected = self.factory.collect_root_requirements(root_reqs)
  67. provider = PipProvider(
  68. factory=self.factory,
  69. constraints=collected.constraints,
  70. ignore_dependencies=self.ignore_dependencies,
  71. upgrade_strategy=self.upgrade_strategy,
  72. user_requested=collected.user_requested,
  73. )
  74. if "PIP_RESOLVER_DEBUG" in os.environ:
  75. reporter: BaseReporter[Requirement, Candidate, str] = PipDebuggingReporter()
  76. else:
  77. reporter = PipReporter(constraints=provider.constraints)
  78. resolver: RLResolver[Requirement, Candidate, str] = RLResolver(
  79. provider,
  80. reporter,
  81. )
  82. try:
  83. limit_how_complex_resolution_can_be = 200000
  84. result = self._result = resolver.resolve(
  85. collected.requirements, max_rounds=limit_how_complex_resolution_can_be
  86. )
  87. except ResolutionImpossible as e:
  88. error = self.factory.get_installation_error(
  89. cast("ResolutionImpossible[Requirement, Candidate]", e),
  90. collected.constraints,
  91. )
  92. raise error from e
  93. except ResolutionTooDeep:
  94. raise ResolutionTooDeepError from None
  95. req_set = RequirementSet(check_supported_wheels=check_supported_wheels)
  96. # process candidates with extras last to ensure their base equivalent is
  97. # already in the req_set if appropriate.
  98. # Python's sort is stable so using a binary key function keeps relative order
  99. # within both subsets.
  100. for candidate in sorted(
  101. result.mapping.values(), key=lambda c: c.name != c.project_name
  102. ):
  103. ireq = candidate.get_install_requirement()
  104. if ireq is None:
  105. if candidate.name != candidate.project_name:
  106. # extend existing req's extras
  107. with contextlib.suppress(KeyError):
  108. req = req_set.get_requirement(candidate.project_name)
  109. req_set.add_named_requirement(
  110. install_req_extend_extras(
  111. req, get_requirement(candidate.name).extras
  112. )
  113. )
  114. continue
  115. # Check if there is already an installation under the same name,
  116. # and set a flag for later stages to uninstall it, if needed.
  117. installed_dist = self.factory.get_dist_to_uninstall(candidate)
  118. if installed_dist is None:
  119. # There is no existing installation -- nothing to uninstall.
  120. ireq.should_reinstall = False
  121. elif self.factory.force_reinstall:
  122. # The --force-reinstall flag is set -- reinstall.
  123. ireq.should_reinstall = True
  124. elif installed_dist.version != candidate.version:
  125. # The installation is different in version -- reinstall.
  126. ireq.should_reinstall = True
  127. elif candidate.is_editable or installed_dist.editable:
  128. # The incoming distribution is editable, or different in
  129. # editable-ness to installation -- reinstall.
  130. ireq.should_reinstall = True
  131. elif candidate.source_link and candidate.source_link.is_file:
  132. # The incoming distribution is under file://
  133. if candidate.source_link.is_wheel:
  134. # is a local wheel -- do nothing.
  135. logger.info(
  136. "%s is already installed with the same version as the "
  137. "provided wheel. Use --force-reinstall to force an "
  138. "installation of the wheel.",
  139. ireq.name,
  140. )
  141. continue
  142. # is a local sdist or path -- reinstall
  143. ireq.should_reinstall = True
  144. else:
  145. continue
  146. link = candidate.source_link
  147. if link and link.is_yanked:
  148. # The reason can contain non-ASCII characters, Unicode
  149. # is required for Python 2.
  150. msg = (
  151. "The candidate selected for download or install is a "
  152. "yanked version: {name!r} candidate (version {version} "
  153. "at {link})\nReason for being yanked: {reason}"
  154. ).format(
  155. name=candidate.name,
  156. version=candidate.version,
  157. link=link,
  158. reason=link.yanked_reason or "<none given>",
  159. )
  160. logger.warning(msg)
  161. req_set.add_named_requirement(ireq)
  162. return req_set
  163. def get_installation_order(
  164. self, req_set: RequirementSet
  165. ) -> list[InstallRequirement]:
  166. """Get order for installation of requirements in RequirementSet.
  167. The returned list contains a requirement before another that depends on
  168. it. This helps ensure that the environment is kept consistent as they
  169. get installed one-by-one.
  170. The current implementation creates a topological ordering of the
  171. dependency graph, giving more weight to packages with less
  172. or no dependencies, while breaking any cycles in the graph at
  173. arbitrary points. We make no guarantees about where the cycle
  174. would be broken, other than it *would* be broken.
  175. """
  176. assert self._result is not None, "must call resolve() first"
  177. if not req_set.requirements:
  178. # Nothing is left to install, so we do not need an order.
  179. return []
  180. graph = self._result.graph
  181. weights = get_topological_weights(graph, set(req_set.requirements.keys()))
  182. sorted_items = sorted(
  183. req_set.requirements.items(),
  184. key=functools.partial(_req_set_item_sorter, weights=weights),
  185. reverse=True,
  186. )
  187. return [ireq for _, ireq in sorted_items]
  188. def get_topological_weights(
  189. graph: DirectedGraph[str | None], requirement_keys: set[str]
  190. ) -> dict[str | None, int]:
  191. """Assign weights to each node based on how "deep" they are.
  192. This implementation may change at any point in the future without prior
  193. notice.
  194. We first simplify the dependency graph by pruning any leaves and giving them
  195. the highest weight: a package without any dependencies should be installed
  196. first. This is done again and again in the same way, giving ever less weight
  197. to the newly found leaves. The loop stops when no leaves are left: all
  198. remaining packages have at least one dependency left in the graph.
  199. Then we continue with the remaining graph, by taking the length for the
  200. longest path to any node from root, ignoring any paths that contain a single
  201. node twice (i.e. cycles). This is done through a depth-first search through
  202. the graph, while keeping track of the path to the node.
  203. Cycles in the graph result would result in node being revisited while also
  204. being on its own path. In this case, take no action. This helps ensure we
  205. don't get stuck in a cycle.
  206. When assigning weight, the longer path (i.e. larger length) is preferred.
  207. We are only interested in the weights of packages that are in the
  208. requirement_keys.
  209. """
  210. path: set[str | None] = set()
  211. weights: dict[str | None, list[int]] = {}
  212. def visit(node: str | None) -> None:
  213. if node in path:
  214. # We hit a cycle, so we'll break it here.
  215. return
  216. # The walk is exponential and for pathologically connected graphs (which
  217. # are the ones most likely to contain cycles in the first place) it can
  218. # take until the heat-death of the universe. To counter this we limit
  219. # the number of attempts to visit (i.e. traverse through) any given
  220. # node. We choose a value here which gives decent enough coverage for
  221. # fairly well behaved graphs, and still limits the walk complexity to be
  222. # linear in nature.
  223. cur_weights = weights.get(node, [])
  224. if len(cur_weights) >= 5:
  225. return
  226. # Time to visit the children!
  227. path.add(node)
  228. for child in graph.iter_children(node):
  229. visit(child)
  230. path.remove(node)
  231. if node not in requirement_keys:
  232. return
  233. cur_weights.append(len(path))
  234. weights[node] = cur_weights
  235. # Simplify the graph, pruning leaves that have no dependencies. This is
  236. # needed for large graphs (say over 200 packages) because the `visit`
  237. # function is slower for large/densely connected graphs, taking minutes.
  238. # See https://github.com/pypa/pip/issues/10557
  239. # We repeat the pruning step until we have no more leaves to remove.
  240. while True:
  241. leaves = set()
  242. for key in graph:
  243. if key is None:
  244. continue
  245. for _child in graph.iter_children(key):
  246. # This means we have at least one child
  247. break
  248. else:
  249. # No child.
  250. leaves.add(key)
  251. if not leaves:
  252. # We are done simplifying.
  253. break
  254. # Calculate the weight for the leaves.
  255. weight = len(graph) - 1
  256. for leaf in leaves:
  257. if leaf not in requirement_keys:
  258. continue
  259. weights[leaf] = [weight]
  260. # Remove the leaves from the graph, making it simpler.
  261. for leaf in leaves:
  262. graph.remove(leaf)
  263. # Visit the remaining graph, this will only have nodes to handle if the
  264. # graph had a cycle in it, which the pruning step above could not handle.
  265. # `None` is guaranteed to be the root node by resolvelib.
  266. visit(None)
  267. # Sanity check: all requirement keys should be in the weights,
  268. # and no other keys should be in the weights.
  269. difference = set(weights.keys()).difference(requirement_keys)
  270. assert not difference, difference
  271. # Now give back all the weights, choosing the largest ones from what we
  272. # accumulated.
  273. return {node: max(wgts) for (node, wgts) in weights.items()}
  274. def _req_set_item_sorter(
  275. item: tuple[str, InstallRequirement],
  276. weights: dict[str | None, int],
  277. ) -> tuple[int, str]:
  278. """Key function used to sort install requirements for installation.
  279. Based on the "weight" mapping calculated in ``get_installation_order()``.
  280. The canonical package name is returned as the second member as a tie-
  281. breaker to ensure the result is predictable, which is useful in tests.
  282. """
  283. name = canonicalize_name(item[0])
  284. return weights[name], name