package_finder.py 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125
  1. """Routines related to PyPI, indexes"""
  2. from __future__ import annotations
  3. import datetime
  4. import enum
  5. import functools
  6. import itertools
  7. import logging
  8. import re
  9. from collections.abc import Iterable
  10. from dataclasses import dataclass
  11. from typing import (
  12. TYPE_CHECKING,
  13. Optional,
  14. Union,
  15. )
  16. from pip._vendor.packaging import specifiers
  17. from pip._vendor.packaging.tags import Tag
  18. from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
  19. from pip._vendor.packaging.version import InvalidVersion, Version, _BaseVersion
  20. from pip._vendor.packaging.version import parse as parse_version
  21. from pip._internal.exceptions import (
  22. BestVersionAlreadyInstalled,
  23. DistributionNotFound,
  24. InstallationError,
  25. InvalidWheelFilename,
  26. UnsupportedWheel,
  27. )
  28. from pip._internal.index.collector import LinkCollector, parse_links
  29. from pip._internal.metadata import select_backend
  30. from pip._internal.models.candidate import InstallationCandidate
  31. from pip._internal.models.format_control import FormatControl
  32. from pip._internal.models.link import Link
  33. from pip._internal.models.release_control import ReleaseControl
  34. from pip._internal.models.search_scope import SearchScope
  35. from pip._internal.models.selection_prefs import SelectionPreferences
  36. from pip._internal.models.target_python import TargetPython
  37. from pip._internal.models.wheel import Wheel
  38. from pip._internal.req import InstallRequirement
  39. from pip._internal.utils._log import getLogger
  40. from pip._internal.utils.filetypes import WHEEL_EXTENSION
  41. from pip._internal.utils.hashes import Hashes
  42. from pip._internal.utils.logging import indent_log
  43. from pip._internal.utils.misc import build_netloc
  44. from pip._internal.utils.packaging import check_requires_python
  45. from pip._internal.utils.unpacking import SUPPORTED_EXTENSIONS
  46. if TYPE_CHECKING:
  47. from typing_extensions import TypeGuard
  48. __all__ = ["FormatControl", "BestCandidateResult", "PackageFinder"]
  49. logger = getLogger(__name__)
  50. BuildTag = Union[tuple[()], tuple[int, str]]
  51. CandidateSortingKey = tuple[int, int, int, _BaseVersion, Optional[int], BuildTag]
  52. def _check_link_requires_python(
  53. link: Link,
  54. version_info: tuple[int, int, int],
  55. ignore_requires_python: bool = False,
  56. ) -> bool:
  57. """
  58. Return whether the given Python version is compatible with a link's
  59. "Requires-Python" value.
  60. :param version_info: A 3-tuple of ints representing the Python
  61. major-minor-micro version to check.
  62. :param ignore_requires_python: Whether to ignore the "Requires-Python"
  63. value if the given Python version isn't compatible.
  64. """
  65. try:
  66. is_compatible = check_requires_python(
  67. link.requires_python,
  68. version_info=version_info,
  69. )
  70. except specifiers.InvalidSpecifier:
  71. logger.debug(
  72. "Ignoring invalid Requires-Python (%r) for link: %s",
  73. link.requires_python,
  74. link,
  75. )
  76. else:
  77. if not is_compatible:
  78. version = ".".join(map(str, version_info))
  79. if not ignore_requires_python:
  80. logger.verbose(
  81. "Link requires a different Python (%s not in: %r): %s",
  82. version,
  83. link.requires_python,
  84. link,
  85. )
  86. return False
  87. logger.debug(
  88. "Ignoring failed Requires-Python check (%s not in: %r) for link: %s",
  89. version,
  90. link.requires_python,
  91. link,
  92. )
  93. return True
  94. class LinkType(enum.Enum):
  95. candidate = enum.auto()
  96. different_project = enum.auto()
  97. yanked = enum.auto()
  98. format_unsupported = enum.auto()
  99. format_invalid = enum.auto()
  100. platform_mismatch = enum.auto()
  101. requires_python_mismatch = enum.auto()
  102. upload_too_late = enum.auto()
  103. upload_time_missing = enum.auto()
  104. class LinkEvaluator:
  105. """
  106. Responsible for evaluating links for a particular project.
  107. """
  108. _py_version_re = re.compile(r"-py([123]\.?[0-9]?)$")
  109. # Don't include an allow_yanked default value to make sure each call
  110. # site considers whether yanked releases are allowed. This also causes
  111. # that decision to be made explicit in the calling code, which helps
  112. # people when reading the code.
  113. def __init__(
  114. self,
  115. project_name: str,
  116. canonical_name: NormalizedName,
  117. formats: frozenset[str],
  118. target_python: TargetPython,
  119. allow_yanked: bool,
  120. ignore_requires_python: bool | None = None,
  121. uploaded_prior_to: datetime.datetime | None = None,
  122. ) -> None:
  123. """
  124. :param project_name: The user supplied package name.
  125. :param canonical_name: The canonical package name.
  126. :param formats: The formats allowed for this package. Should be a set
  127. with 'binary' or 'source' or both in it.
  128. :param target_python: The target Python interpreter to use when
  129. evaluating link compatibility. This is used, for example, to
  130. check wheel compatibility, as well as when checking the Python
  131. version, e.g. the Python version embedded in a link filename
  132. (or egg fragment) and against an HTML link's optional PEP 503
  133. "data-requires-python" attribute.
  134. :param allow_yanked: Whether files marked as yanked (in the sense
  135. of PEP 592) are permitted to be candidates for install.
  136. :param ignore_requires_python: Whether to ignore incompatible
  137. PEP 503 "data-requires-python" values in HTML links. Defaults
  138. to False.
  139. :param uploaded_prior_to: If set, only allow links uploaded prior to
  140. the given datetime.
  141. """
  142. if ignore_requires_python is None:
  143. ignore_requires_python = False
  144. self._allow_yanked = allow_yanked
  145. self._canonical_name = canonical_name
  146. self._ignore_requires_python = ignore_requires_python
  147. self._formats = formats
  148. self._target_python = target_python
  149. self._uploaded_prior_to = uploaded_prior_to
  150. self.project_name = project_name
  151. def evaluate_link(self, link: Link) -> tuple[LinkType, str]:
  152. """
  153. Determine whether a link is a candidate for installation.
  154. :return: A tuple (result, detail), where *result* is an enum
  155. representing whether the evaluation found a candidate, or the reason
  156. why one is not found. If a candidate is found, *detail* will be the
  157. candidate's version string; if one is not found, it contains the
  158. reason the link fails to qualify.
  159. """
  160. version = None
  161. if link.is_yanked and not self._allow_yanked:
  162. reason = link.yanked_reason or "<none given>"
  163. return (LinkType.yanked, f"yanked for reason: {reason}")
  164. if link.egg_fragment:
  165. egg_info = link.egg_fragment
  166. ext = link.ext
  167. else:
  168. egg_info, ext = link.splitext()
  169. if not ext:
  170. return (LinkType.format_unsupported, "not a file")
  171. if ext not in SUPPORTED_EXTENSIONS:
  172. return (
  173. LinkType.format_unsupported,
  174. f"unsupported archive format: {ext}",
  175. )
  176. if "binary" not in self._formats and ext == WHEEL_EXTENSION:
  177. reason = f"No binaries permitted for {self.project_name}"
  178. return (LinkType.format_unsupported, reason)
  179. if "macosx10" in link.path and ext == ".zip":
  180. return (LinkType.format_unsupported, "macosx10 one")
  181. if ext == WHEEL_EXTENSION:
  182. try:
  183. wheel = Wheel(link.filename)
  184. except InvalidWheelFilename:
  185. return (
  186. LinkType.format_invalid,
  187. "invalid wheel filename",
  188. )
  189. if wheel.name != self._canonical_name:
  190. reason = f"wrong project name (not {self.project_name})"
  191. return (LinkType.different_project, reason)
  192. supported_tags = self._target_python.get_unsorted_tags()
  193. if not wheel.supported(supported_tags):
  194. # Include the wheel's tags in the reason string to
  195. # simplify troubleshooting compatibility issues.
  196. file_tags = ", ".join(wheel.get_formatted_file_tags())
  197. reason = (
  198. f"none of the wheel's tags ({file_tags}) are compatible "
  199. f"(run pip debug --verbose to show compatible tags)"
  200. )
  201. return (LinkType.platform_mismatch, reason)
  202. version = wheel.version
  203. # Check upload-time filter after verifying the link is a package file.
  204. # Skip this check for local files, as --uploaded-prior-to only applies
  205. # to packages from indexes.
  206. if self._uploaded_prior_to is not None and not link.is_file:
  207. if link.upload_time is None:
  208. if link.comes_from:
  209. index_info = f"Index {link.comes_from}"
  210. else:
  211. index_info = "Index"
  212. return (
  213. LinkType.upload_time_missing,
  214. f"{index_info} does not provide upload-time metadata.",
  215. )
  216. elif link.upload_time >= self._uploaded_prior_to:
  217. return (
  218. LinkType.upload_too_late,
  219. f"Upload time {link.upload_time} not "
  220. f"prior to {self._uploaded_prior_to}",
  221. )
  222. # This should be up by the self.ok_binary check, but see issue 2700.
  223. if "source" not in self._formats and ext != WHEEL_EXTENSION:
  224. reason = f"No sources permitted for {self.project_name}"
  225. return (LinkType.format_unsupported, reason)
  226. if not version:
  227. version = _extract_version_from_fragment(
  228. egg_info,
  229. self._canonical_name,
  230. )
  231. if not version:
  232. reason = f"Missing project version for {self.project_name}"
  233. return (LinkType.format_invalid, reason)
  234. match = self._py_version_re.search(version)
  235. if match:
  236. version = version[: match.start()]
  237. py_version = match.group(1)
  238. if py_version != self._target_python.py_version:
  239. return (
  240. LinkType.platform_mismatch,
  241. "Python version is incorrect",
  242. )
  243. supports_python = _check_link_requires_python(
  244. link,
  245. version_info=self._target_python.py_version_info,
  246. ignore_requires_python=self._ignore_requires_python,
  247. )
  248. if not supports_python:
  249. requires_python = link.requires_python
  250. if requires_python:
  251. def get_version_sort_key(v: str) -> tuple[int, ...]:
  252. return tuple(int(s) for s in v.split(".") if s.isdigit())
  253. requires_python = ",".join(
  254. sorted(
  255. (str(s) for s in specifiers.SpecifierSet(requires_python)),
  256. key=get_version_sort_key,
  257. )
  258. )
  259. reason = f"{version} Requires-Python {requires_python}"
  260. return (LinkType.requires_python_mismatch, reason)
  261. logger.debug("Found link %s, version: %s", link, version)
  262. return (LinkType.candidate, version)
  263. def filter_unallowed_hashes(
  264. candidates: list[InstallationCandidate],
  265. hashes: Hashes | None,
  266. project_name: str,
  267. ) -> list[InstallationCandidate]:
  268. """
  269. Filter out candidates whose hashes aren't allowed, and return a new
  270. list of candidates.
  271. If at least one candidate has an allowed hash, then all candidates with
  272. either an allowed hash or no hash specified are returned. Otherwise,
  273. the given candidates are returned.
  274. Including the candidates with no hash specified when there is a match
  275. allows a warning to be logged if there is a more preferred candidate
  276. with no hash specified. Returning all candidates in the case of no
  277. matches lets pip report the hash of the candidate that would otherwise
  278. have been installed (e.g. permitting the user to more easily update
  279. their requirements file with the desired hash).
  280. """
  281. if not hashes:
  282. logger.debug(
  283. "Given no hashes to check %s links for project %r: "
  284. "discarding no candidates",
  285. len(candidates),
  286. project_name,
  287. )
  288. # Make sure we're not returning back the given value.
  289. return list(candidates)
  290. matches_or_no_digest = []
  291. # Collect the non-matches for logging purposes.
  292. non_matches = []
  293. match_count = 0
  294. for candidate in candidates:
  295. link = candidate.link
  296. if not link.has_hash:
  297. pass
  298. elif link.is_hash_allowed(hashes=hashes):
  299. match_count += 1
  300. else:
  301. non_matches.append(candidate)
  302. continue
  303. matches_or_no_digest.append(candidate)
  304. if match_count:
  305. filtered = matches_or_no_digest
  306. else:
  307. # Make sure we're not returning back the given value.
  308. filtered = list(candidates)
  309. if len(filtered) == len(candidates):
  310. discard_message = "discarding no candidates"
  311. else:
  312. discard_message = "discarding {} non-matches:\n {}".format(
  313. len(non_matches),
  314. "\n ".join(str(candidate.link) for candidate in non_matches),
  315. )
  316. logger.debug(
  317. "Checked %s links for project %r against %s hashes "
  318. "(%s matches, %s no digest): %s",
  319. len(candidates),
  320. project_name,
  321. hashes.digest_count,
  322. match_count,
  323. len(matches_or_no_digest) - match_count,
  324. discard_message,
  325. )
  326. return filtered
  327. @dataclass
  328. class CandidatePreferences:
  329. """
  330. Encapsulates some of the preferences for filtering and sorting
  331. InstallationCandidate objects.
  332. """
  333. prefer_binary: bool = False
  334. release_control: ReleaseControl | None = None
  335. @dataclass(frozen=True)
  336. class BestCandidateResult:
  337. """A collection of candidates, returned by `PackageFinder.find_best_candidate`.
  338. This class is only intended to be instantiated by CandidateEvaluator's
  339. `compute_best_candidate()` method.
  340. :param all_candidates: A sequence of all available candidates found.
  341. :param applicable_candidates: The applicable candidates.
  342. :param best_candidate: The most preferred candidate found, or None
  343. if no applicable candidates were found.
  344. """
  345. all_candidates: list[InstallationCandidate]
  346. applicable_candidates: list[InstallationCandidate]
  347. best_candidate: InstallationCandidate | None
  348. def __post_init__(self) -> None:
  349. assert set(self.applicable_candidates) <= set(self.all_candidates)
  350. if self.best_candidate is None:
  351. assert not self.applicable_candidates
  352. else:
  353. assert self.best_candidate in self.applicable_candidates
  354. class CandidateEvaluator:
  355. """
  356. Responsible for filtering and sorting candidates for installation based
  357. on what tags are valid.
  358. """
  359. @classmethod
  360. def create(
  361. cls,
  362. project_name: str,
  363. target_python: TargetPython | None = None,
  364. prefer_binary: bool = False,
  365. release_control: ReleaseControl | None = None,
  366. specifier: specifiers.BaseSpecifier | None = None,
  367. hashes: Hashes | None = None,
  368. ) -> CandidateEvaluator:
  369. """Create a CandidateEvaluator object.
  370. :param target_python: The target Python interpreter to use when
  371. checking compatibility. If None (the default), a TargetPython
  372. object will be constructed from the running Python.
  373. :param specifier: An optional object implementing `filter`
  374. (e.g. `packaging.specifiers.SpecifierSet`) to filter applicable
  375. versions.
  376. :param hashes: An optional collection of allowed hashes.
  377. """
  378. if target_python is None:
  379. target_python = TargetPython()
  380. if specifier is None:
  381. specifier = specifiers.SpecifierSet()
  382. supported_tags = target_python.get_sorted_tags()
  383. return cls(
  384. project_name=project_name,
  385. supported_tags=supported_tags,
  386. specifier=specifier,
  387. prefer_binary=prefer_binary,
  388. release_control=release_control,
  389. hashes=hashes,
  390. )
  391. def __init__(
  392. self,
  393. project_name: str,
  394. supported_tags: list[Tag],
  395. specifier: specifiers.BaseSpecifier,
  396. prefer_binary: bool = False,
  397. release_control: ReleaseControl | None = None,
  398. hashes: Hashes | None = None,
  399. ) -> None:
  400. """
  401. :param supported_tags: The PEP 425 tags supported by the target
  402. Python in order of preference (most preferred first).
  403. """
  404. self._release_control = release_control
  405. self._hashes = hashes
  406. self._prefer_binary = prefer_binary
  407. self._project_name = project_name
  408. self._specifier = specifier
  409. self._supported_tags = supported_tags
  410. # Since the index of the tag in the _supported_tags list is used
  411. # as a priority, precompute a map from tag to index/priority to be
  412. # used in wheel.find_most_preferred_tag.
  413. self._wheel_tag_preferences = {
  414. tag: idx for idx, tag in enumerate(supported_tags)
  415. }
  416. def get_applicable_candidates(
  417. self,
  418. candidates: list[InstallationCandidate],
  419. ) -> list[InstallationCandidate]:
  420. """
  421. Return the applicable candidates from a list of candidates.
  422. """
  423. # Using None infers from the specifier instead.
  424. if self._release_control is not None:
  425. allow_prereleases = self._release_control.allows_prereleases(
  426. canonicalize_name(self._project_name)
  427. )
  428. else:
  429. allow_prereleases = None
  430. specifier = self._specifier
  431. # When using the pkg_resources backend we turn the version object into
  432. # a str here because otherwise when we're debundled but setuptools isn't,
  433. # Python will see packaging.version.Version and
  434. # pkg_resources._vendor.packaging.version.Version as different
  435. # types. This way we'll use a str as a common data interchange
  436. # format. If we stop using the pkg_resources provided specifier
  437. # and start using our own, we can drop the cast to str().
  438. if select_backend().NAME == "pkg_resources":
  439. candidates_and_versions: list[
  440. tuple[InstallationCandidate, str | Version]
  441. ] = [(c, str(c.version)) for c in candidates]
  442. else:
  443. candidates_and_versions = [(c, c.version) for c in candidates]
  444. versions = set(
  445. specifier.filter(
  446. (v for _, v in candidates_and_versions),
  447. prereleases=allow_prereleases,
  448. )
  449. )
  450. applicable_candidates = [c for c, v in candidates_and_versions if v in versions]
  451. filtered_applicable_candidates = filter_unallowed_hashes(
  452. candidates=applicable_candidates,
  453. hashes=self._hashes,
  454. project_name=self._project_name,
  455. )
  456. return sorted(filtered_applicable_candidates, key=self._sort_key)
  457. def _sort_key(self, candidate: InstallationCandidate) -> CandidateSortingKey:
  458. """
  459. Function to pass as the `key` argument to a call to sorted() to sort
  460. InstallationCandidates by preference.
  461. Returns a tuple such that tuples sorting as greater using Python's
  462. default comparison operator are more preferred.
  463. The preference is as follows:
  464. First and foremost, candidates with allowed (matching) hashes are
  465. always preferred over candidates without matching hashes. This is
  466. because e.g. if the only candidate with an allowed hash is yanked,
  467. we still want to use that candidate.
  468. Second, excepting hash considerations, candidates that have been
  469. yanked (in the sense of PEP 592) are always less preferred than
  470. candidates that haven't been yanked. Then:
  471. If not finding wheels, they are sorted by version only.
  472. If finding wheels, then the sort order is by version, then:
  473. 1. existing installs
  474. 2. wheels ordered via Wheel.support_index_min(self._supported_tags)
  475. 3. source archives
  476. If prefer_binary was set, then all wheels are sorted above sources.
  477. Note: it was considered to embed this logic into the Link
  478. comparison operators, but then different sdist links
  479. with the same version, would have to be considered equal
  480. """
  481. valid_tags = self._supported_tags
  482. support_num = len(valid_tags)
  483. build_tag: BuildTag = ()
  484. binary_preference = 0
  485. link = candidate.link
  486. if link.is_wheel:
  487. # can raise InvalidWheelFilename
  488. wheel = Wheel(link.filename)
  489. try:
  490. pri = -(
  491. wheel.find_most_preferred_tag(
  492. valid_tags, self._wheel_tag_preferences
  493. )
  494. )
  495. except ValueError:
  496. raise UnsupportedWheel(
  497. f"{wheel.filename} is not a supported wheel for this platform. It "
  498. "can't be sorted."
  499. )
  500. if self._prefer_binary:
  501. binary_preference = 1
  502. build_tag = wheel.build_tag
  503. else: # sdist
  504. pri = -(support_num)
  505. has_allowed_hash = int(link.is_hash_allowed(self._hashes))
  506. yank_value = -1 * int(link.is_yanked) # -1 for yanked.
  507. return (
  508. has_allowed_hash,
  509. yank_value,
  510. binary_preference,
  511. candidate.version,
  512. pri,
  513. build_tag,
  514. )
  515. def sort_best_candidate(
  516. self,
  517. candidates: list[InstallationCandidate],
  518. ) -> InstallationCandidate | None:
  519. """
  520. Return the best candidate per the instance's sort order, or None if
  521. no candidate is acceptable.
  522. """
  523. if not candidates:
  524. return None
  525. best_candidate = max(candidates, key=self._sort_key)
  526. return best_candidate
  527. def compute_best_candidate(
  528. self,
  529. candidates: list[InstallationCandidate],
  530. ) -> BestCandidateResult:
  531. """
  532. Compute and return a `BestCandidateResult` instance.
  533. """
  534. applicable_candidates = self.get_applicable_candidates(candidates)
  535. best_candidate = self.sort_best_candidate(applicable_candidates)
  536. return BestCandidateResult(
  537. candidates,
  538. applicable_candidates=applicable_candidates,
  539. best_candidate=best_candidate,
  540. )
  541. class PackageFinder:
  542. """This finds packages.
  543. This is meant to match easy_install's technique for looking for
  544. packages, by reading pages and looking for appropriate links.
  545. """
  546. def __init__(
  547. self,
  548. link_collector: LinkCollector,
  549. target_python: TargetPython,
  550. allow_yanked: bool,
  551. format_control: FormatControl | None = None,
  552. candidate_prefs: CandidatePreferences | None = None,
  553. ignore_requires_python: bool | None = None,
  554. uploaded_prior_to: datetime.datetime | None = None,
  555. ) -> None:
  556. """
  557. This constructor is primarily meant to be used by the create() class
  558. method and from tests.
  559. :param format_control: A FormatControl object, used to control
  560. the selection of source packages / binary packages when consulting
  561. the index and links.
  562. :param candidate_prefs: Options to use when creating a
  563. CandidateEvaluator object.
  564. """
  565. if candidate_prefs is None:
  566. candidate_prefs = CandidatePreferences()
  567. format_control = format_control or FormatControl(set(), set())
  568. self._allow_yanked = allow_yanked
  569. self._candidate_prefs = candidate_prefs
  570. self._ignore_requires_python = ignore_requires_python
  571. self._link_collector = link_collector
  572. self._target_python = target_python
  573. self._uploaded_prior_to = uploaded_prior_to
  574. self.format_control = format_control
  575. # These are boring links that have already been logged somehow.
  576. self._logged_links: set[tuple[Link, LinkType, str]] = set()
  577. # Cache of the result of finding candidates
  578. self._all_candidates: dict[str, list[InstallationCandidate]] = {}
  579. self._best_candidates: dict[
  580. tuple[str, specifiers.BaseSpecifier | None, Hashes | None],
  581. BestCandidateResult,
  582. ] = {}
  583. # Don't include an allow_yanked default value to make sure each call
  584. # site considers whether yanked releases are allowed. This also causes
  585. # that decision to be made explicit in the calling code, which helps
  586. # people when reading the code.
  587. @classmethod
  588. def create(
  589. cls,
  590. link_collector: LinkCollector,
  591. selection_prefs: SelectionPreferences,
  592. target_python: TargetPython | None = None,
  593. uploaded_prior_to: datetime.datetime | None = None,
  594. ) -> PackageFinder:
  595. """Create a PackageFinder.
  596. :param selection_prefs: The candidate selection preferences, as a
  597. SelectionPreferences object.
  598. :param target_python: The target Python interpreter to use when
  599. checking compatibility. If None (the default), a TargetPython
  600. object will be constructed from the running Python.
  601. :param uploaded_prior_to: If set, only find links uploaded prior
  602. to the given datetime.
  603. """
  604. if target_python is None:
  605. target_python = TargetPython()
  606. candidate_prefs = CandidatePreferences(
  607. prefer_binary=selection_prefs.prefer_binary,
  608. release_control=selection_prefs.release_control,
  609. )
  610. return cls(
  611. candidate_prefs=candidate_prefs,
  612. link_collector=link_collector,
  613. target_python=target_python,
  614. allow_yanked=selection_prefs.allow_yanked,
  615. format_control=selection_prefs.format_control,
  616. ignore_requires_python=selection_prefs.ignore_requires_python,
  617. uploaded_prior_to=uploaded_prior_to,
  618. )
  619. @property
  620. def target_python(self) -> TargetPython:
  621. return self._target_python
  622. @property
  623. def search_scope(self) -> SearchScope:
  624. return self._link_collector.search_scope
  625. @search_scope.setter
  626. def search_scope(self, search_scope: SearchScope) -> None:
  627. self._link_collector.search_scope = search_scope
  628. @property
  629. def find_links(self) -> list[str]:
  630. return self._link_collector.find_links
  631. @property
  632. def index_urls(self) -> list[str]:
  633. return self.search_scope.index_urls
  634. @property
  635. def proxy(self) -> str | None:
  636. return self._link_collector.session.pip_proxy
  637. @property
  638. def trusted_hosts(self) -> Iterable[str]:
  639. for host_port in self._link_collector.session.pip_trusted_origins:
  640. yield build_netloc(*host_port)
  641. @property
  642. def custom_cert(self) -> str | None:
  643. # session.verify is either a boolean (use default bundle/no SSL
  644. # verification) or a string path to a custom CA bundle to use. We only
  645. # care about the latter.
  646. verify = self._link_collector.session.verify
  647. return verify if isinstance(verify, str) else None
  648. @property
  649. def client_cert(self) -> str | None:
  650. cert = self._link_collector.session.cert
  651. assert not isinstance(cert, tuple), "pip only supports PEM client certs"
  652. return cert
  653. @property
  654. def release_control(self) -> ReleaseControl | None:
  655. return self._candidate_prefs.release_control
  656. def set_release_control(self, release_control: ReleaseControl) -> None:
  657. self._candidate_prefs.release_control = release_control
  658. @property
  659. def prefer_binary(self) -> bool:
  660. return self._candidate_prefs.prefer_binary
  661. def set_prefer_binary(self) -> None:
  662. self._candidate_prefs.prefer_binary = True
  663. @property
  664. def uploaded_prior_to(self) -> datetime.datetime | None:
  665. return self._uploaded_prior_to
  666. def requires_python_skipped_reasons(self) -> list[str]:
  667. reasons = {
  668. detail
  669. for _, result, detail in self._logged_links
  670. if result == LinkType.requires_python_mismatch
  671. }
  672. return sorted(reasons)
  673. def make_link_evaluator(self, project_name: str) -> LinkEvaluator:
  674. canonical_name = canonicalize_name(project_name)
  675. formats = self.format_control.get_allowed_formats(canonical_name)
  676. return LinkEvaluator(
  677. project_name=project_name,
  678. canonical_name=canonical_name,
  679. formats=formats,
  680. target_python=self._target_python,
  681. allow_yanked=self._allow_yanked,
  682. ignore_requires_python=self._ignore_requires_python,
  683. uploaded_prior_to=self._uploaded_prior_to,
  684. )
  685. def _sort_links(self, links: Iterable[Link]) -> list[Link]:
  686. """
  687. Returns elements of links in order, non-egg links first, egg links
  688. second, while eliminating duplicates
  689. """
  690. eggs, no_eggs = [], []
  691. seen: set[Link] = set()
  692. for link in links:
  693. if link not in seen:
  694. seen.add(link)
  695. if link.egg_fragment:
  696. eggs.append(link)
  697. else:
  698. no_eggs.append(link)
  699. return no_eggs + eggs
  700. def _log_skipped_link(self, link: Link, result: LinkType, detail: str) -> None:
  701. entry = (link, result, detail)
  702. if entry not in self._logged_links:
  703. # Put the link at the end so the reason is more visible and because
  704. # the link string is usually very long.
  705. logger.debug("Skipping link: %s: %s", detail, link)
  706. self._logged_links.add(entry)
  707. def get_install_candidate(
  708. self, link_evaluator: LinkEvaluator, link: Link
  709. ) -> InstallationCandidate | None:
  710. """
  711. If the link is a candidate for install, convert it to an
  712. InstallationCandidate and return it. Otherwise, return None.
  713. """
  714. result, detail = link_evaluator.evaluate_link(link)
  715. if result == LinkType.upload_time_missing:
  716. # Fail immediately if the index doesn't provide upload-time
  717. # when --uploaded-prior-to is specified
  718. raise InstallationError(detail)
  719. if result != LinkType.candidate:
  720. self._log_skipped_link(link, result, detail)
  721. return None
  722. try:
  723. return InstallationCandidate(
  724. name=link_evaluator.project_name,
  725. link=link,
  726. version=detail,
  727. )
  728. except InvalidVersion:
  729. return None
  730. def evaluate_links(
  731. self, link_evaluator: LinkEvaluator, links: Iterable[Link]
  732. ) -> list[InstallationCandidate]:
  733. """
  734. Convert links that are candidates to InstallationCandidate objects.
  735. """
  736. candidates = []
  737. for link in self._sort_links(links):
  738. candidate = self.get_install_candidate(link_evaluator, link)
  739. if candidate is not None:
  740. candidates.append(candidate)
  741. return candidates
  742. def process_project_url(
  743. self, project_url: Link, link_evaluator: LinkEvaluator
  744. ) -> list[InstallationCandidate]:
  745. logger.debug(
  746. "Fetching project page and analyzing links: %s",
  747. project_url,
  748. )
  749. index_response = self._link_collector.fetch_response(project_url)
  750. if index_response is None:
  751. return []
  752. page_links = list(parse_links(index_response))
  753. with indent_log():
  754. package_links = self.evaluate_links(
  755. link_evaluator,
  756. links=page_links,
  757. )
  758. return package_links
  759. def find_all_candidates(self, project_name: str) -> list[InstallationCandidate]:
  760. """Find all available InstallationCandidate for project_name
  761. This checks index_urls and find_links.
  762. All versions found are returned as an InstallationCandidate list.
  763. See LinkEvaluator.evaluate_link() for details on which files
  764. are accepted.
  765. """
  766. if project_name in self._all_candidates:
  767. return self._all_candidates[project_name]
  768. link_evaluator = self.make_link_evaluator(project_name)
  769. collected_sources = self._link_collector.collect_sources(
  770. project_name=project_name,
  771. candidates_from_page=functools.partial(
  772. self.process_project_url,
  773. link_evaluator=link_evaluator,
  774. ),
  775. )
  776. page_candidates_it = itertools.chain.from_iterable(
  777. source.page_candidates()
  778. for sources in collected_sources
  779. for source in sources
  780. if source is not None
  781. )
  782. page_candidates = list(page_candidates_it)
  783. file_links_it = itertools.chain.from_iterable(
  784. source.file_links()
  785. for sources in collected_sources
  786. for source in sources
  787. if source is not None
  788. )
  789. file_candidates = self.evaluate_links(
  790. link_evaluator,
  791. sorted(file_links_it, reverse=True),
  792. )
  793. if logger.isEnabledFor(logging.DEBUG) and file_candidates:
  794. paths = []
  795. for candidate in file_candidates:
  796. assert candidate.link.url # we need to have a URL
  797. try:
  798. paths.append(candidate.link.file_path)
  799. except Exception:
  800. paths.append(candidate.link.url) # it's not a local file
  801. logger.debug("Local files found: %s", ", ".join(paths))
  802. # This is an intentional priority ordering
  803. self._all_candidates[project_name] = file_candidates + page_candidates
  804. return self._all_candidates[project_name]
  805. def make_candidate_evaluator(
  806. self,
  807. project_name: str,
  808. specifier: specifiers.BaseSpecifier | None = None,
  809. hashes: Hashes | None = None,
  810. ) -> CandidateEvaluator:
  811. """Create a CandidateEvaluator object to use."""
  812. candidate_prefs = self._candidate_prefs
  813. return CandidateEvaluator.create(
  814. project_name=project_name,
  815. target_python=self._target_python,
  816. prefer_binary=candidate_prefs.prefer_binary,
  817. release_control=candidate_prefs.release_control,
  818. specifier=specifier,
  819. hashes=hashes,
  820. )
  821. def find_best_candidate(
  822. self,
  823. project_name: str,
  824. specifier: specifiers.BaseSpecifier | None = None,
  825. hashes: Hashes | None = None,
  826. ) -> BestCandidateResult:
  827. """Find matches for the given project and specifier.
  828. :param specifier: An optional object implementing `filter`
  829. (e.g. `packaging.specifiers.SpecifierSet`) to filter applicable
  830. versions.
  831. :return: A `BestCandidateResult` instance.
  832. """
  833. if (project_name, specifier, hashes) in self._best_candidates:
  834. return self._best_candidates[project_name, specifier, hashes]
  835. candidates = self.find_all_candidates(project_name)
  836. candidate_evaluator = self.make_candidate_evaluator(
  837. project_name=project_name,
  838. specifier=specifier,
  839. hashes=hashes,
  840. )
  841. self._best_candidates[project_name, specifier, hashes] = (
  842. candidate_evaluator.compute_best_candidate(candidates)
  843. )
  844. return self._best_candidates[project_name, specifier, hashes]
  845. def find_requirement(
  846. self, req: InstallRequirement, upgrade: bool
  847. ) -> InstallationCandidate | None:
  848. """Try to find a Link matching req
  849. Expects req, an InstallRequirement and upgrade, a boolean
  850. Returns a InstallationCandidate if found,
  851. Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise
  852. """
  853. name = req.name
  854. assert name is not None, "find_requirement() called with no name"
  855. hashes = req.hashes(trust_internet=False)
  856. best_candidate_result = self.find_best_candidate(
  857. name,
  858. specifier=req.specifier,
  859. hashes=hashes,
  860. )
  861. best_candidate = best_candidate_result.best_candidate
  862. installed_version: _BaseVersion | None = None
  863. if req.satisfied_by is not None:
  864. installed_version = req.satisfied_by.version
  865. def _format_versions(cand_iter: Iterable[InstallationCandidate]) -> str:
  866. # This repeated parse_version and str() conversion is needed to
  867. # handle different vendoring sources from pip and pkg_resources.
  868. # If we stop using the pkg_resources provided specifier and start
  869. # using our own, we can drop the cast to str().
  870. return (
  871. ", ".join(
  872. sorted(
  873. {str(c.version) for c in cand_iter},
  874. key=parse_version,
  875. )
  876. )
  877. or "none"
  878. )
  879. if installed_version is None and best_candidate is None:
  880. # Check if only final releases are allowed for this package
  881. version_type = "version"
  882. if self.release_control is not None:
  883. allows_pre = self.release_control.allows_prereleases(
  884. canonicalize_name(name)
  885. )
  886. if allows_pre is False:
  887. version_type = "final version"
  888. logger.critical(
  889. "Could not find a %s that satisfies the requirement %s "
  890. "(from versions: %s)",
  891. version_type,
  892. req,
  893. _format_versions(best_candidate_result.all_candidates),
  894. )
  895. raise DistributionNotFound(f"No matching distribution found for {req}")
  896. def _should_install_candidate(
  897. candidate: InstallationCandidate | None,
  898. ) -> TypeGuard[InstallationCandidate]:
  899. if installed_version is None:
  900. return True
  901. if best_candidate is None:
  902. return False
  903. return best_candidate.version > installed_version
  904. if not upgrade and installed_version is not None:
  905. if _should_install_candidate(best_candidate):
  906. logger.debug(
  907. "Existing installed version (%s) satisfies requirement "
  908. "(most up-to-date version is %s)",
  909. installed_version,
  910. best_candidate.version,
  911. )
  912. else:
  913. logger.debug(
  914. "Existing installed version (%s) is most up-to-date and "
  915. "satisfies requirement",
  916. installed_version,
  917. )
  918. return None
  919. if _should_install_candidate(best_candidate):
  920. logger.debug(
  921. "Using version %s (newest of versions: %s)",
  922. best_candidate.version,
  923. _format_versions(best_candidate_result.applicable_candidates),
  924. )
  925. return best_candidate
  926. # We have an existing version, and its the best version
  927. logger.debug(
  928. "Installed version (%s) is most up-to-date (past versions: %s)",
  929. installed_version,
  930. _format_versions(best_candidate_result.applicable_candidates),
  931. )
  932. raise BestVersionAlreadyInstalled
  933. def _find_name_version_sep(fragment: str, canonical_name: str) -> int:
  934. """Find the separator's index based on the package's canonical name.
  935. :param fragment: A <package>+<version> filename "fragment" (stem) or
  936. egg fragment.
  937. :param canonical_name: The package's canonical name.
  938. This function is needed since the canonicalized name does not necessarily
  939. have the same length as the egg info's name part. An example::
  940. >>> fragment = 'foo__bar-1.0'
  941. >>> canonical_name = 'foo-bar'
  942. >>> _find_name_version_sep(fragment, canonical_name)
  943. 8
  944. """
  945. # Project name and version must be separated by one single dash. Find all
  946. # occurrences of dashes; if the string in front of it matches the canonical
  947. # name, this is the one separating the name and version parts.
  948. for i, c in enumerate(fragment):
  949. if c != "-":
  950. continue
  951. if canonicalize_name(fragment[:i]) == canonical_name:
  952. return i
  953. raise ValueError(f"{fragment} does not match {canonical_name}")
  954. def _extract_version_from_fragment(fragment: str, canonical_name: str) -> str | None:
  955. """Parse the version string from a <package>+<version> filename
  956. "fragment" (stem) or egg fragment.
  957. :param fragment: The string to parse. E.g. foo-2.1
  958. :param canonical_name: The canonicalized name of the package this
  959. belongs to.
  960. """
  961. try:
  962. version_start = _find_name_version_sep(fragment, canonical_name) + 1
  963. except ValueError:
  964. return None
  965. version = fragment[version_start:]
  966. if not version:
  967. return None
  968. return version