_cached_py_info.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. """Acquire Python information via subprocess interrogation with multi-level caching."""
  2. from __future__ import annotations
  3. import hashlib
  4. import json
  5. import logging
  6. import os
  7. import pkgutil
  8. import secrets
  9. import subprocess # noqa: S404
  10. import sys
  11. import tempfile
  12. from collections import OrderedDict
  13. from contextlib import contextmanager
  14. from pathlib import Path
  15. from shlex import quote
  16. from subprocess import Popen, TimeoutExpired # noqa: S404
  17. from typing import TYPE_CHECKING, Final
  18. from ._cache import NoOpCache
  19. from ._py_info import PythonInfo
  20. if TYPE_CHECKING:
  21. from collections.abc import Generator, Mapping
  22. from ._cache import ContentStore, PyInfoCache
  23. _CACHE: OrderedDict[Path, PythonInfo | Exception] = OrderedDict()
  24. _CACHE[Path(sys.executable)] = PythonInfo()
  25. _LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
  26. def from_exe( # noqa: PLR0913
  27. cls: type[PythonInfo],
  28. cache: PyInfoCache | None,
  29. exe: str,
  30. env: Mapping[str, str] | None = None,
  31. *,
  32. raise_on_error: bool = True,
  33. ignore_cache: bool = False,
  34. ) -> PythonInfo | None:
  35. env = os.environ if env is None else env
  36. result = _get_from_cache(cls, cache, exe, env, ignore_cache=ignore_cache)
  37. if isinstance(result, Exception):
  38. if raise_on_error:
  39. raise result
  40. _LOGGER.info("%s", result)
  41. result = None
  42. return result
  43. def _get_from_cache(
  44. cls: type[PythonInfo],
  45. cache: PyInfoCache | None,
  46. exe: str,
  47. env: Mapping[str, str],
  48. *,
  49. ignore_cache: bool = True,
  50. ) -> PythonInfo | Exception:
  51. exe_path = Path(exe)
  52. if not ignore_cache and exe_path in _CACHE:
  53. result = _CACHE[exe_path]
  54. else:
  55. py_info = _get_via_file_cache(cls, cache, exe_path, exe, env)
  56. result = _CACHE[exe_path] = py_info
  57. if isinstance(result, PythonInfo):
  58. result.executable = exe
  59. return result
  60. def _get_via_file_cache(
  61. cls: type[PythonInfo],
  62. cache: PyInfoCache | None,
  63. path: Path,
  64. exe: str,
  65. env: Mapping[str, str],
  66. ) -> PythonInfo | Exception:
  67. path_text = str(path)
  68. try:
  69. path_modified = path.stat().st_mtime
  70. except OSError:
  71. path_modified = -1
  72. py_info_script = Path(Path(__file__).resolve()).parent / "_py_info.py"
  73. try:
  74. py_info_hash: str | None = hashlib.sha256(py_info_script.read_bytes()).hexdigest()
  75. except OSError:
  76. py_info_hash = None
  77. resolved_cache = cache if cache is not None else NoOpCache()
  78. py_info: PythonInfo | None = None
  79. py_info_store = resolved_cache.py_info(path)
  80. with py_info_store.locked():
  81. if py_info_store.exists() and (data := py_info_store.read()) is not None:
  82. of_path, of_st_mtime = data.get("path"), data.get("st_mtime")
  83. of_content, of_hash = data.get("content"), data.get("hash")
  84. if (
  85. of_path == path_text
  86. and of_st_mtime == path_modified
  87. and of_hash == py_info_hash
  88. and isinstance(of_content, dict)
  89. ):
  90. py_info = _load_cached_py_info(cls, py_info_store, of_content)
  91. else:
  92. py_info_store.remove()
  93. if py_info is None:
  94. failure, py_info = _run_subprocess(cls, exe, env)
  95. if failure is not None:
  96. _LOGGER.debug("first subprocess attempt failed for %s (%s), retrying", exe, failure)
  97. failure, py_info = _run_subprocess(cls, exe, env)
  98. if failure is not None:
  99. return failure
  100. if py_info is not None:
  101. py_info_store.write({
  102. "st_mtime": path_modified,
  103. "path": path_text,
  104. "content": py_info.to_dict(),
  105. "hash": py_info_hash,
  106. })
  107. if py_info is None:
  108. msg = f"{exe} failed to produce interpreter info"
  109. return RuntimeError(msg)
  110. return py_info
  111. def _load_cached_py_info(
  112. cls: type[PythonInfo],
  113. py_info_store: ContentStore,
  114. content: dict,
  115. ) -> PythonInfo | None:
  116. try:
  117. py_info = cls.from_dict(content.copy())
  118. except (KeyError, TypeError):
  119. py_info_store.remove()
  120. return None
  121. if (sys_exe := py_info.system_executable) is not None and not Path(sys_exe).exists():
  122. py_info_store.remove()
  123. return None
  124. return py_info
  125. COOKIE_LENGTH: Final[int] = 32
  126. def gen_cookie() -> str:
  127. return secrets.token_hex(COOKIE_LENGTH // 2)
  128. @contextmanager
  129. def _resolve_py_info_script() -> Generator[Path]:
  130. py_info_script = Path(Path(__file__).resolve()).parent / "_py_info.py"
  131. if py_info_script.is_file():
  132. yield py_info_script
  133. else:
  134. data = pkgutil.get_data(__package__ or __name__, "_py_info.py")
  135. if data is None:
  136. msg = "cannot locate _py_info.py for subprocess interrogation"
  137. raise FileNotFoundError(msg)
  138. fd, tmp = tempfile.mkstemp(suffix=".py")
  139. try:
  140. os.write(fd, data)
  141. os.close(fd)
  142. yield Path(tmp)
  143. finally:
  144. Path(tmp).unlink()
  145. def _extract_between_cookies(out: str, start_cookie: str, end_cookie: str) -> tuple[str, str, int, int]:
  146. """Extract payload between reversed cookie markers, forwarding any surrounding output to stdout."""
  147. raw_out = out
  148. out_starts = out.find(start_cookie[::-1])
  149. if out_starts > -1:
  150. if pre_cookie := out[:out_starts]:
  151. sys.stdout.write(pre_cookie)
  152. out = out[out_starts + COOKIE_LENGTH :]
  153. out_ends = out.find(end_cookie[::-1])
  154. if out_ends > -1:
  155. if post_cookie := out[out_ends + COOKIE_LENGTH :]:
  156. sys.stdout.write(post_cookie)
  157. out = out[:out_ends]
  158. return out, raw_out, out_starts, out_ends
  159. def _run_subprocess(
  160. cls: type[PythonInfo],
  161. exe: str,
  162. env: Mapping[str, str],
  163. ) -> tuple[Exception | None, PythonInfo | None]:
  164. start_cookie = gen_cookie()
  165. end_cookie = gen_cookie()
  166. timeout = float(env.get("PY_DISCOVERY_TIMEOUT", "15"))
  167. with _resolve_py_info_script() as py_info_script:
  168. cmd = [exe, str(py_info_script), start_cookie, end_cookie]
  169. env = dict(env)
  170. env.pop("__PYVENV_LAUNCHER__", None)
  171. env["PYTHONUTF8"] = "1"
  172. _LOGGER.debug("get interpreter info via cmd: %s", LogCmd(cmd))
  173. try:
  174. process = Popen( # noqa: S603
  175. cmd,
  176. universal_newlines=True,
  177. stdin=subprocess.PIPE,
  178. stderr=subprocess.PIPE,
  179. stdout=subprocess.PIPE,
  180. env=env,
  181. encoding="utf-8",
  182. errors="backslashreplace",
  183. )
  184. out, err = process.communicate(timeout=timeout)
  185. code = process.returncode
  186. except TimeoutExpired:
  187. process.kill()
  188. process.communicate()
  189. out, err, code = "", "timed out", -1
  190. except OSError as os_error:
  191. out, err, code = "", os_error.strerror, os_error.errno
  192. if code != 0:
  193. msg = f"{exe} with code {code}{f' out: {out!r}' if out else ''}{f' err: {err!r}' if err else ''}"
  194. return RuntimeError(f"failed to query {msg}"), None
  195. out, raw_out, out_starts, out_ends = _extract_between_cookies(out, start_cookie, end_cookie)
  196. try:
  197. result = cls.from_json(out)
  198. result.executable = exe
  199. except json.JSONDecodeError as exc:
  200. _LOGGER.warning(
  201. "subprocess %s returned invalid JSON; raw stdout %d chars, start cookie %s, end cookie %s, "
  202. "parsed output %d chars: %r",
  203. exe,
  204. len(raw_out),
  205. "found" if out_starts > -1 else "missing",
  206. "found" if out_ends > -1 else "missing",
  207. len(out),
  208. out[:200] if out else "<empty>",
  209. )
  210. msg = f"{exe} returned invalid JSON (exit code {code}){f', stderr: {err!r}' if err else ''}"
  211. failure = RuntimeError(msg)
  212. failure.__cause__ = exc
  213. return failure, None
  214. return None, result
  215. class LogCmd:
  216. def __init__(self, cmd: list[str], env: Mapping[str, str] | None = None) -> None:
  217. self.cmd = cmd
  218. self.env = env
  219. def __repr__(self) -> str:
  220. cmd_repr = " ".join(quote(str(c)) for c in self.cmd)
  221. if self.env is not None:
  222. cmd_repr = f"{cmd_repr} env of {self.env!r}"
  223. return cmd_repr
  224. def clear(cache: PyInfoCache) -> None:
  225. cache.py_info_clear()
  226. _CACHE.clear()
  227. __all__ = [
  228. "LogCmd",
  229. "clear",
  230. "from_exe",
  231. ]