cache.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. """HTTP cache implementation."""
  2. from __future__ import annotations
  3. import os
  4. import shutil
  5. from collections.abc import Generator
  6. from contextlib import contextmanager
  7. from datetime import datetime
  8. from typing import Any, BinaryIO, Callable
  9. from pip._vendor.cachecontrol.cache import SeparateBodyBaseCache
  10. from pip._vendor.cachecontrol.caches import SeparateBodyFileCache
  11. from pip._vendor.requests.models import Response
  12. from pip._internal.utils.filesystem import (
  13. adjacent_tmp_file,
  14. copy_directory_permissions,
  15. replace,
  16. )
  17. from pip._internal.utils.misc import ensure_dir
  18. def is_from_cache(response: Response) -> bool:
  19. return getattr(response, "from_cache", False)
  20. @contextmanager
  21. def suppressed_cache_errors() -> Generator[None, None, None]:
  22. """If we can't access the cache then we can just skip caching and process
  23. requests as if caching wasn't enabled.
  24. """
  25. try:
  26. yield
  27. except OSError:
  28. pass
  29. class SafeFileCache(SeparateBodyBaseCache):
  30. """
  31. A file based cache which is safe to use even when the target directory may
  32. not be accessible or writable.
  33. There is a race condition when two processes try to write and/or read the
  34. same entry at the same time, since each entry consists of two separate
  35. files (https://github.com/psf/cachecontrol/issues/324). We therefore have
  36. additional logic that makes sure that both files to be present before
  37. returning an entry; this fixes the read side of the race condition.
  38. For the write side, we assume that the server will only ever return the
  39. same data for the same URL, which ought to be the case for files pip is
  40. downloading. PyPI does not have a mechanism to swap out a wheel for
  41. another wheel, for example. If this assumption is not true, the
  42. CacheControl issue will need to be fixed.
  43. """
  44. def __init__(self, directory: str) -> None:
  45. assert directory is not None, "Cache directory must not be None."
  46. super().__init__()
  47. self.directory = directory
  48. def _get_cache_path(self, name: str) -> str:
  49. # From cachecontrol.caches.file_cache.FileCache._fn, brought into our
  50. # class for backwards-compatibility and to avoid using a non-public
  51. # method.
  52. hashed = SeparateBodyFileCache.encode(name)
  53. parts = list(hashed[:5]) + [hashed]
  54. return os.path.join(self.directory, *parts)
  55. def get(self, key: str) -> bytes | None:
  56. # The cache entry is only valid if both metadata and body exist.
  57. metadata_path = self._get_cache_path(key)
  58. body_path = metadata_path + ".body"
  59. if not (os.path.exists(metadata_path) and os.path.exists(body_path)):
  60. return None
  61. with suppressed_cache_errors():
  62. with open(metadata_path, "rb") as f:
  63. return f.read()
  64. def _write_to_file(self, path: str, writer_func: Callable[[BinaryIO], Any]) -> None:
  65. """Common file writing logic with proper permissions and atomic replacement."""
  66. with suppressed_cache_errors():
  67. ensure_dir(os.path.dirname(path))
  68. with adjacent_tmp_file(path) as f:
  69. writer_func(f)
  70. # Inherit the read/write permissions of the cache directory
  71. # to enable multi-user cache use-cases.
  72. copy_directory_permissions(self.directory, f)
  73. replace(f.name, path)
  74. def _write(self, path: str, data: bytes) -> None:
  75. self._write_to_file(path, lambda f: f.write(data))
  76. def _write_from_io(self, path: str, source_file: BinaryIO) -> None:
  77. self._write_to_file(path, lambda f: shutil.copyfileobj(source_file, f))
  78. def set(
  79. self, key: str, value: bytes, expires: int | datetime | None = None
  80. ) -> None:
  81. path = self._get_cache_path(key)
  82. self._write(path, value)
  83. def delete(self, key: str) -> None:
  84. path = self._get_cache_path(key)
  85. with suppressed_cache_errors():
  86. os.remove(path)
  87. with suppressed_cache_errors():
  88. os.remove(path + ".body")
  89. def get_body(self, key: str) -> BinaryIO | None:
  90. # The cache entry is only valid if both metadata and body exist.
  91. metadata_path = self._get_cache_path(key)
  92. body_path = metadata_path + ".body"
  93. if not (os.path.exists(metadata_path) and os.path.exists(body_path)):
  94. return None
  95. with suppressed_cache_errors():
  96. return open(body_path, "rb")
  97. def set_body(self, key: str, body: bytes) -> None:
  98. path = self._get_cache_path(key) + ".body"
  99. self._write(path, body)
  100. def set_body_from_io(self, key: str, body_file: BinaryIO) -> None:
  101. """Set the body of the cache entry from a file object."""
  102. path = self._get_cache_path(key) + ".body"
  103. self._write_from_io(path, body_file)