session.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. """PipSession and supporting code, containing all pip-specific
  2. network request configuration and behavior.
  3. """
  4. from __future__ import annotations
  5. import email.utils
  6. import functools
  7. import io
  8. import ipaddress
  9. import json
  10. import logging
  11. import mimetypes
  12. import os
  13. import platform
  14. import shutil
  15. import subprocess
  16. import sys
  17. import urllib.parse
  18. import warnings
  19. from collections.abc import Generator, Mapping, Sequence
  20. from typing import (
  21. TYPE_CHECKING,
  22. Any,
  23. Optional,
  24. Union,
  25. )
  26. from pip._vendor import requests, urllib3
  27. from pip._vendor.cachecontrol import CacheControlAdapter as _BaseCacheControlAdapter
  28. from pip._vendor.requests.adapters import DEFAULT_POOLBLOCK, BaseAdapter
  29. from pip._vendor.requests.adapters import HTTPAdapter as _BaseHTTPAdapter
  30. from pip._vendor.requests.models import PreparedRequest, Response
  31. from pip._vendor.requests.structures import CaseInsensitiveDict
  32. from pip._vendor.urllib3.connectionpool import ConnectionPool
  33. from pip._vendor.urllib3.exceptions import InsecureRequestWarning
  34. from pip import __version__
  35. from pip._internal.metadata import get_default_environment
  36. from pip._internal.models.link import Link
  37. from pip._internal.network.auth import MultiDomainBasicAuth
  38. from pip._internal.network.cache import SafeFileCache
  39. # Import ssl from compat so the initial import occurs in only one place.
  40. from pip._internal.utils.compat import has_tls
  41. from pip._internal.utils.glibc import libc_ver
  42. from pip._internal.utils.misc import build_url_from_netloc, parse_netloc
  43. from pip._internal.utils.urls import url_to_path
  44. if TYPE_CHECKING:
  45. from ssl import SSLContext
  46. from pip._vendor.urllib3 import ProxyManager
  47. from pip._vendor.urllib3.poolmanager import PoolManager
  48. logger = logging.getLogger(__name__)
  49. SecureOrigin = tuple[str, str, Optional[Union[int, str]]]
  50. # Ignore warning raised when using --trusted-host.
  51. warnings.filterwarnings("ignore", category=InsecureRequestWarning)
  52. SECURE_ORIGINS: list[SecureOrigin] = [
  53. # protocol, hostname, port
  54. # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC)
  55. ("https", "*", "*"),
  56. ("*", "localhost", "*"),
  57. ("*", "127.0.0.0/8", "*"),
  58. ("*", "::1/128", "*"),
  59. ("file", "*", None),
  60. # ssh is always secure.
  61. ("ssh", "*", "*"),
  62. ]
  63. # These are environment variables present when running under various
  64. # CI systems. For each variable, some CI systems that use the variable
  65. # are indicated. The collection was chosen so that for each of a number
  66. # of popular systems, at least one of the environment variables is used.
  67. # This list is used to provide some indication of and lower bound for
  68. # CI traffic to PyPI. Thus, it is okay if the list is not comprehensive.
  69. # For more background, see: https://github.com/pypa/pip/issues/5499
  70. CI_ENVIRONMENT_VARIABLES = (
  71. # Azure Pipelines
  72. "BUILD_BUILDID",
  73. # Jenkins
  74. "BUILD_ID",
  75. # AppVeyor, CircleCI, Codeship, Gitlab CI, Shippable, Travis CI
  76. "CI",
  77. # Explicit environment variable.
  78. "PIP_IS_CI",
  79. )
  80. def looks_like_ci() -> bool:
  81. """
  82. Return whether it looks like pip is running under CI.
  83. """
  84. # We don't use the method of checking for a tty (e.g. using isatty())
  85. # because some CI systems mimic a tty (e.g. Travis CI). Thus that
  86. # method doesn't provide definitive information in either direction.
  87. return any(name in os.environ for name in CI_ENVIRONMENT_VARIABLES)
  88. @functools.lru_cache(maxsize=1)
  89. def user_agent() -> str:
  90. """
  91. Return a string representing the user agent.
  92. """
  93. data: dict[str, Any] = {
  94. "installer": {"name": "pip", "version": __version__},
  95. "python": platform.python_version(),
  96. "implementation": {
  97. "name": platform.python_implementation(),
  98. },
  99. }
  100. if data["implementation"]["name"] == "CPython":
  101. data["implementation"]["version"] = platform.python_version()
  102. elif data["implementation"]["name"] == "PyPy":
  103. pypy_version_info = sys.pypy_version_info # type: ignore
  104. if pypy_version_info.releaselevel == "final":
  105. pypy_version_info = pypy_version_info[:3]
  106. data["implementation"]["version"] = ".".join(
  107. [str(x) for x in pypy_version_info]
  108. )
  109. elif data["implementation"]["name"] == "Jython":
  110. # Complete Guess
  111. data["implementation"]["version"] = platform.python_version()
  112. elif data["implementation"]["name"] == "IronPython":
  113. # Complete Guess
  114. data["implementation"]["version"] = platform.python_version()
  115. if sys.platform.startswith("linux"):
  116. from pip._vendor import distro
  117. linux_distribution = distro.name(), distro.version(), distro.codename()
  118. distro_infos: dict[str, Any] = dict(
  119. filter(
  120. lambda x: x[1],
  121. zip(["name", "version", "id"], linux_distribution),
  122. )
  123. )
  124. libc = dict(
  125. filter(
  126. lambda x: x[1],
  127. zip(["lib", "version"], libc_ver()),
  128. )
  129. )
  130. if libc:
  131. distro_infos["libc"] = libc
  132. if distro_infos:
  133. data["distro"] = distro_infos
  134. if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
  135. data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
  136. if platform.system():
  137. data.setdefault("system", {})["name"] = platform.system()
  138. if platform.release():
  139. data.setdefault("system", {})["release"] = platform.release()
  140. if platform.machine():
  141. data["cpu"] = platform.machine()
  142. if has_tls():
  143. import _ssl as ssl
  144. data["openssl_version"] = ssl.OPENSSL_VERSION
  145. setuptools_dist = get_default_environment().get_distribution("setuptools")
  146. if setuptools_dist is not None:
  147. data["setuptools_version"] = str(setuptools_dist.version)
  148. if shutil.which("rustc") is not None:
  149. # If for any reason `rustc --version` fails, silently ignore it
  150. try:
  151. rustc_output = subprocess.check_output(
  152. ["rustc", "--version"], stderr=subprocess.STDOUT, timeout=0.5
  153. )
  154. except Exception:
  155. pass
  156. else:
  157. if rustc_output.startswith(b"rustc "):
  158. # The format of `rustc --version` is:
  159. # `b'rustc 1.52.1 (9bc8c42bb 2021-05-09)\n'`
  160. # We extract just the middle (1.52.1) part
  161. data["rustc_version"] = rustc_output.split(b" ")[1].decode()
  162. # Use None rather than False so as not to give the impression that
  163. # pip knows it is not being run under CI. Rather, it is a null or
  164. # inconclusive result. Also, we include some value rather than no
  165. # value to make it easier to know that the check has been run.
  166. data["ci"] = True if looks_like_ci() else None
  167. user_data = os.environ.get("PIP_USER_AGENT_USER_DATA")
  168. if user_data is not None:
  169. data["user_data"] = user_data
  170. return "{data[installer][name]}/{data[installer][version]} {json}".format(
  171. data=data,
  172. json=json.dumps(data, separators=(",", ":"), sort_keys=True),
  173. )
  174. class LocalFSAdapter(BaseAdapter):
  175. def send(
  176. self,
  177. request: PreparedRequest,
  178. stream: bool = False,
  179. timeout: float | tuple[float, float] | tuple[float, None] | None = None,
  180. verify: bool | str = True,
  181. cert: bytes | str | tuple[bytes | str, bytes | str] | None = None,
  182. proxies: Mapping[str, str] | None = None,
  183. ) -> Response:
  184. assert request.url is not None
  185. pathname = url_to_path(request.url)
  186. resp = Response()
  187. resp.status_code = 200
  188. resp.url = request.url
  189. try:
  190. stats = os.stat(pathname)
  191. except OSError as exc:
  192. # format the exception raised as a io.BytesIO object,
  193. # to return a better error message:
  194. resp.status_code = 404
  195. resp.reason = type(exc).__name__
  196. resp.raw = io.BytesIO(f"{resp.reason}: {exc}".encode())
  197. else:
  198. modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
  199. content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
  200. resp.headers = CaseInsensitiveDict(
  201. {
  202. "Content-Type": content_type,
  203. "Content-Length": str(stats.st_size),
  204. "Last-Modified": modified,
  205. }
  206. )
  207. resp.raw = open(pathname, "rb")
  208. resp.close = resp.raw.close # type: ignore[method-assign]
  209. return resp
  210. def close(self) -> None:
  211. pass
  212. class _SSLContextAdapterMixin:
  213. """Mixin to add the ``ssl_context`` constructor argument to HTTP adapters.
  214. The additional argument is forwarded directly to the pool manager. This allows us
  215. to dynamically decide what SSL store to use at runtime, which is used to implement
  216. the optional ``truststore`` backend.
  217. """
  218. def __init__(
  219. self,
  220. *,
  221. ssl_context: SSLContext | None = None,
  222. **kwargs: Any,
  223. ) -> None:
  224. self._ssl_context = ssl_context
  225. super().__init__(**kwargs)
  226. def init_poolmanager(
  227. self,
  228. connections: int,
  229. maxsize: int,
  230. block: bool = DEFAULT_POOLBLOCK,
  231. **pool_kwargs: Any,
  232. ) -> PoolManager:
  233. if self._ssl_context is not None:
  234. pool_kwargs.setdefault("ssl_context", self._ssl_context)
  235. return super().init_poolmanager( # type: ignore[misc, no-any-return]
  236. connections=connections,
  237. maxsize=maxsize,
  238. block=block,
  239. **pool_kwargs,
  240. )
  241. def proxy_manager_for(self, proxy: str, **proxy_kwargs: Any) -> ProxyManager:
  242. # Proxy manager replaces the pool manager, so inject our SSL
  243. # context here too. https://github.com/pypa/pip/issues/13288
  244. if self._ssl_context is not None:
  245. proxy_kwargs.setdefault("ssl_context", self._ssl_context)
  246. return super().proxy_manager_for(proxy, **proxy_kwargs) # type: ignore[misc, no-any-return]
  247. class HTTPAdapter(_SSLContextAdapterMixin, _BaseHTTPAdapter):
  248. pass
  249. class CacheControlAdapter(_SSLContextAdapterMixin, _BaseCacheControlAdapter):
  250. pass
  251. class InsecureHTTPAdapter(HTTPAdapter):
  252. def cert_verify(
  253. self,
  254. conn: ConnectionPool,
  255. url: str,
  256. verify: bool | str,
  257. cert: str | tuple[str, str] | None,
  258. ) -> None:
  259. super().cert_verify(conn=conn, url=url, verify=False, cert=cert)
  260. class InsecureCacheControlAdapter(CacheControlAdapter):
  261. def cert_verify(
  262. self,
  263. conn: ConnectionPool,
  264. url: str,
  265. verify: bool | str,
  266. cert: str | tuple[str, str] | None,
  267. ) -> None:
  268. super().cert_verify(conn=conn, url=url, verify=False, cert=cert)
  269. class PipSession(requests.Session):
  270. timeout: int | None = None
  271. def __init__(
  272. self,
  273. *args: Any,
  274. retries: int = 0,
  275. resume_retries: int = 0,
  276. cache: str | None = None,
  277. trusted_hosts: Sequence[str] = (),
  278. index_urls: list[str] | None = None,
  279. ssl_context: SSLContext | None = None,
  280. **kwargs: Any,
  281. ) -> None:
  282. """
  283. :param trusted_hosts: Domains not to emit warnings for when not using
  284. HTTPS.
  285. """
  286. super().__init__(*args, **kwargs)
  287. # Namespace the attribute with "pip_" just in case to prevent
  288. # possible conflicts with the base class.
  289. self.pip_trusted_origins: list[tuple[str, int | None]] = []
  290. self.pip_proxy = None
  291. # Attach our User Agent to the request
  292. self.headers["User-Agent"] = user_agent()
  293. # Attach our Authentication handler to the session
  294. self.auth: MultiDomainBasicAuth = MultiDomainBasicAuth(index_urls=index_urls)
  295. # Create our urllib3.Retry instance which will allow us to customize
  296. # how we handle retries.
  297. retries = urllib3.Retry(
  298. # Set the total number of retries that a particular request can
  299. # have.
  300. total=retries,
  301. # A 503 error from PyPI typically means that the Fastly -> Origin
  302. # connection got interrupted in some way. A 503 error in general
  303. # is typically considered a transient error so we'll go ahead and
  304. # retry it.
  305. # A 500 may indicate transient error in Amazon S3
  306. # A 502 may be a transient error from a CDN like CloudFlare or CloudFront
  307. # A 520 or 527 - may indicate transient error in CloudFlare
  308. status_forcelist=[500, 502, 503, 520, 527],
  309. # Add a small amount of back off between failed requests in
  310. # order to prevent hammering the service.
  311. backoff_factor=0.25,
  312. ) # type: ignore
  313. self.resume_retries = resume_retries
  314. # Our Insecure HTTPAdapter disables HTTPS validation. It does not
  315. # support caching so we'll use it for all http:// URLs.
  316. # If caching is disabled, we will also use it for
  317. # https:// hosts that we've marked as ignoring
  318. # TLS errors for (trusted-hosts).
  319. insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
  320. # We want to _only_ cache responses on securely fetched origins or when
  321. # the host is specified as trusted. We do this because
  322. # we can't validate the response of an insecurely/untrusted fetched
  323. # origin, and we don't want someone to be able to poison the cache and
  324. # require manual eviction from the cache to fix it.
  325. self._trusted_host_adapter: InsecureCacheControlAdapter | InsecureHTTPAdapter
  326. if cache:
  327. secure_adapter: _BaseHTTPAdapter = CacheControlAdapter(
  328. cache=SafeFileCache(cache),
  329. max_retries=retries,
  330. ssl_context=ssl_context,
  331. )
  332. self._trusted_host_adapter = InsecureCacheControlAdapter(
  333. cache=SafeFileCache(cache),
  334. max_retries=retries,
  335. )
  336. else:
  337. secure_adapter = HTTPAdapter(max_retries=retries, ssl_context=ssl_context)
  338. self._trusted_host_adapter = insecure_adapter
  339. self.mount("https://", secure_adapter)
  340. self.mount("http://", insecure_adapter)
  341. # Enable file:// urls
  342. self.mount("file://", LocalFSAdapter())
  343. for host in trusted_hosts:
  344. self.add_trusted_host(host, suppress_logging=True)
  345. def update_index_urls(self, new_index_urls: list[str]) -> None:
  346. """
  347. :param new_index_urls: New index urls to update the authentication
  348. handler with.
  349. """
  350. self.auth.index_urls = new_index_urls
  351. def add_trusted_host(
  352. self, host: str, source: str | None = None, suppress_logging: bool = False
  353. ) -> None:
  354. """
  355. :param host: It is okay to provide a host that has previously been
  356. added.
  357. :param source: An optional source string, for logging where the host
  358. string came from.
  359. """
  360. if not suppress_logging:
  361. msg = f"adding trusted host: {host!r}"
  362. if source is not None:
  363. msg += f" (from {source})"
  364. logger.info(msg)
  365. parsed_host, parsed_port = parse_netloc(host)
  366. if parsed_host is None:
  367. raise ValueError(f"Trusted host URL must include a host part: {host!r}")
  368. if (parsed_host, parsed_port) not in self.pip_trusted_origins:
  369. self.pip_trusted_origins.append((parsed_host, parsed_port))
  370. self.mount(
  371. build_url_from_netloc(host, scheme="http") + "/", self._trusted_host_adapter
  372. )
  373. self.mount(build_url_from_netloc(host) + "/", self._trusted_host_adapter)
  374. if not parsed_port:
  375. self.mount(
  376. build_url_from_netloc(host, scheme="http") + ":",
  377. self._trusted_host_adapter,
  378. )
  379. # Mount wildcard ports for the same host.
  380. self.mount(build_url_from_netloc(host) + ":", self._trusted_host_adapter)
  381. def iter_secure_origins(self) -> Generator[SecureOrigin, None, None]:
  382. yield from SECURE_ORIGINS
  383. for host, port in self.pip_trusted_origins:
  384. yield ("*", host, "*" if port is None else port)
  385. def is_secure_origin(self, location: Link) -> bool:
  386. # Determine if this url used a secure transport mechanism
  387. parsed = urllib.parse.urlparse(str(location))
  388. origin_protocol, origin_host, origin_port = (
  389. parsed.scheme,
  390. parsed.hostname,
  391. parsed.port,
  392. )
  393. # The protocol to use to see if the protocol matches.
  394. # Don't count the repository type as part of the protocol: in
  395. # cases such as "git+ssh", only use "ssh". (I.e., Only verify against
  396. # the last scheme.)
  397. origin_protocol = origin_protocol.rsplit("+", 1)[-1]
  398. # Determine if our origin is a secure origin by looking through our
  399. # hardcoded list of secure origins, as well as any additional ones
  400. # configured on this PackageFinder instance.
  401. for secure_origin in self.iter_secure_origins():
  402. secure_protocol, secure_host, secure_port = secure_origin
  403. if origin_protocol != secure_protocol and secure_protocol != "*":
  404. continue
  405. try:
  406. addr = ipaddress.ip_address(origin_host or "")
  407. network = ipaddress.ip_network(secure_host)
  408. except ValueError:
  409. # We don't have both a valid address or a valid network, so
  410. # we'll check this origin against hostnames.
  411. if (
  412. origin_host
  413. and origin_host.lower() != secure_host.lower()
  414. and secure_host != "*"
  415. ):
  416. continue
  417. else:
  418. # We have a valid address and network, so see if the address
  419. # is contained within the network.
  420. if addr not in network:
  421. continue
  422. # Check to see if the port matches.
  423. if (
  424. origin_port != secure_port
  425. and secure_port != "*"
  426. and secure_port is not None
  427. ):
  428. continue
  429. # If we've gotten here, then this origin matches the current
  430. # secure origin and we should return True
  431. return True
  432. # If we've gotten to this point, then the origin isn't secure and we
  433. # will not accept it as a valid location to search. We will however
  434. # log a warning that we are ignoring it.
  435. logger.warning(
  436. "The repository located at %s is not a trusted or secure host and "
  437. "is being ignored. If this repository is available via HTTPS we "
  438. "recommend you use HTTPS instead, otherwise you may silence "
  439. "this warning and allow it anyway with '--trusted-host %s'.",
  440. origin_host,
  441. origin_host,
  442. )
  443. return False
  444. def request(self, method: str, url: str, *args: Any, **kwargs: Any) -> Response: # type: ignore[override]
  445. # Allow setting a default timeout on a session
  446. kwargs.setdefault("timeout", self.timeout)
  447. # Allow setting a default proxies on a session
  448. kwargs.setdefault("proxies", self.proxies)
  449. # Dispatch the actual request
  450. return super().request(method, url, *args, **kwargs)