util.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. """distutils.util
  2. Miscellaneous utility functions -- anything that doesn't fit into
  3. one of the other *util.py modules.
  4. """
  5. from __future__ import annotations
  6. import functools
  7. import importlib.util
  8. import os
  9. import pathlib
  10. import re
  11. import string
  12. import subprocess
  13. import sys
  14. import sysconfig
  15. import tempfile
  16. from collections.abc import Callable, Iterable, Mapping
  17. from typing import TYPE_CHECKING, AnyStr
  18. from jaraco.functools import pass_none
  19. from ._log import log
  20. from ._modified import newer
  21. from .errors import DistutilsByteCompileError, DistutilsPlatformError
  22. from .spawn import spawn
  23. if TYPE_CHECKING:
  24. from typing_extensions import TypeVarTuple, Unpack
  25. _Ts = TypeVarTuple("_Ts")
  26. def get_host_platform() -> str:
  27. """
  28. Return a string that identifies the current platform. Use this
  29. function to distinguish platform-specific build directories and
  30. platform-specific built distributions.
  31. """
  32. # This function initially exposed platforms as defined in Python 3.9
  33. # even with older Python versions when distutils was split out.
  34. # Now it delegates to stdlib sysconfig.
  35. return sysconfig.get_platform()
  36. def get_platform() -> str:
  37. if os.name == 'nt':
  38. TARGET_TO_PLAT = {
  39. 'x86': 'win32',
  40. 'x64': 'win-amd64',
  41. 'arm': 'win-arm32',
  42. 'arm64': 'win-arm64',
  43. }
  44. target = os.environ.get('VSCMD_ARG_TGT_ARCH')
  45. return TARGET_TO_PLAT.get(target) or get_host_platform()
  46. return get_host_platform()
  47. if sys.platform == 'darwin':
  48. _syscfg_macosx_ver = None # cache the version pulled from sysconfig
  49. MACOSX_VERSION_VAR = 'MACOSX_DEPLOYMENT_TARGET'
  50. def _clear_cached_macosx_ver():
  51. """For testing only. Do not call."""
  52. global _syscfg_macosx_ver
  53. _syscfg_macosx_ver = None
  54. def get_macosx_target_ver_from_syscfg():
  55. """Get the version of macOS latched in the Python interpreter configuration.
  56. Returns the version as a string or None if can't obtain one. Cached."""
  57. global _syscfg_macosx_ver
  58. if _syscfg_macosx_ver is None:
  59. from distutils import sysconfig
  60. ver = sysconfig.get_config_var(MACOSX_VERSION_VAR) or ''
  61. if ver:
  62. _syscfg_macosx_ver = ver
  63. return _syscfg_macosx_ver
  64. def get_macosx_target_ver():
  65. """Return the version of macOS for which we are building.
  66. The target version defaults to the version in sysconfig latched at time
  67. the Python interpreter was built, unless overridden by an environment
  68. variable. If neither source has a value, then None is returned"""
  69. syscfg_ver = get_macosx_target_ver_from_syscfg()
  70. env_ver = os.environ.get(MACOSX_VERSION_VAR)
  71. if env_ver:
  72. # Validate overridden version against sysconfig version, if have both.
  73. # Ensure that the deployment target of the build process is not less
  74. # than 10.3 if the interpreter was built for 10.3 or later. This
  75. # ensures extension modules are built with correct compatibility
  76. # values, specifically LDSHARED which can use
  77. # '-undefined dynamic_lookup' which only works on >= 10.3.
  78. if (
  79. syscfg_ver
  80. and split_version(syscfg_ver) >= [10, 3]
  81. and split_version(env_ver) < [10, 3]
  82. ):
  83. my_msg = (
  84. '$' + MACOSX_VERSION_VAR + ' mismatch: '
  85. f'now "{env_ver}" but "{syscfg_ver}" during configure; '
  86. 'must use 10.3 or later'
  87. )
  88. raise DistutilsPlatformError(my_msg)
  89. return env_ver
  90. return syscfg_ver
  91. def split_version(s: str) -> list[int]:
  92. """Convert a dot-separated string into a list of numbers for comparisons"""
  93. return [int(n) for n in s.split('.')]
  94. @pass_none
  95. def convert_path(pathname: str | os.PathLike[str]) -> str:
  96. r"""
  97. Allow for pathlib.Path inputs, coax to a native path string.
  98. If None is passed, will just pass it through as
  99. Setuptools relies on this behavior.
  100. >>> convert_path(None) is None
  101. True
  102. Removes empty paths.
  103. >>> convert_path('foo/./bar').replace('\\', '/')
  104. 'foo/bar'
  105. """
  106. return os.fspath(pathlib.PurePath(pathname))
  107. def change_root(
  108. new_root: AnyStr | os.PathLike[AnyStr], pathname: AnyStr | os.PathLike[AnyStr]
  109. ) -> AnyStr:
  110. """Return 'pathname' with 'new_root' prepended. If 'pathname' is
  111. relative, this is equivalent to "os.path.join(new_root,pathname)".
  112. Otherwise, it requires making 'pathname' relative and then joining the
  113. two, which is tricky on DOS/Windows and Mac OS.
  114. """
  115. if os.name == 'posix':
  116. if not os.path.isabs(pathname):
  117. return os.path.join(new_root, pathname)
  118. else:
  119. return os.path.join(new_root, pathname[1:])
  120. elif os.name == 'nt':
  121. (drive, path) = os.path.splitdrive(pathname)
  122. if path[0] == os.sep:
  123. path = path[1:]
  124. return os.path.join(new_root, path)
  125. raise DistutilsPlatformError(f"nothing known about platform '{os.name}'")
  126. @functools.lru_cache
  127. def check_environ() -> None:
  128. """Ensure that 'os.environ' has all the environment variables we
  129. guarantee that users can use in config files, command-line options,
  130. etc. Currently this includes:
  131. HOME - user's home directory (Unix only)
  132. PLAT - description of the current platform, including hardware
  133. and OS (see 'get_platform()')
  134. """
  135. if os.name == 'posix' and 'HOME' not in os.environ:
  136. try:
  137. import pwd
  138. os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
  139. except (ImportError, KeyError):
  140. # bpo-10496: if the current user identifier doesn't exist in the
  141. # password database, do nothing
  142. pass
  143. if 'PLAT' not in os.environ:
  144. os.environ['PLAT'] = get_platform()
  145. def subst_vars(s, local_vars: Mapping[str, object]) -> str:
  146. """
  147. Perform variable substitution on 'string'.
  148. Variables are indicated by format-style braces ("{var}").
  149. Variable is substituted by the value found in the 'local_vars'
  150. dictionary or in 'os.environ' if it's not in 'local_vars'.
  151. 'os.environ' is first checked/augmented to guarantee that it contains
  152. certain values: see 'check_environ()'. Raise ValueError for any
  153. variables not found in either 'local_vars' or 'os.environ'.
  154. """
  155. check_environ()
  156. lookup = dict(os.environ)
  157. lookup.update((name, str(value)) for name, value in local_vars.items())
  158. try:
  159. return _subst_compat(s).format_map(lookup)
  160. except KeyError as var:
  161. raise ValueError(f"invalid variable {var}")
  162. def _subst_compat(s):
  163. """
  164. Replace shell/Perl-style variable substitution with
  165. format-style. For compatibility.
  166. """
  167. def _subst(match):
  168. return f'{{{match.group(1)}}}'
  169. repl = re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
  170. if repl != s:
  171. import warnings
  172. warnings.warn(
  173. "shell/Perl-style substitutions are deprecated",
  174. DeprecationWarning,
  175. )
  176. return repl
  177. def grok_environment_error(exc: object, prefix: str = "error: ") -> str:
  178. # Function kept for backward compatibility.
  179. # Used to try clever things with EnvironmentErrors,
  180. # but nowadays str(exception) produces good messages.
  181. return prefix + str(exc)
  182. # Needed by 'split_quoted()'
  183. _wordchars_re = _squote_re = _dquote_re = None
  184. def _init_regex():
  185. global _wordchars_re, _squote_re, _dquote_re
  186. _wordchars_re = re.compile(rf'[^\\\'\"{string.whitespace} ]*')
  187. _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
  188. _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
  189. def split_quoted(s: str) -> list[str]:
  190. """Split a string up according to Unix shell-like rules for quotes and
  191. backslashes. In short: words are delimited by spaces, as long as those
  192. spaces are not escaped by a backslash, or inside a quoted string.
  193. Single and double quotes are equivalent, and the quote characters can
  194. be backslash-escaped. The backslash is stripped from any two-character
  195. escape sequence, leaving only the escaped character. The quote
  196. characters are stripped from any quoted string. Returns a list of
  197. words.
  198. """
  199. # This is a nice algorithm for splitting up a single string, since it
  200. # doesn't require character-by-character examination. It was a little
  201. # bit of a brain-bender to get it working right, though...
  202. if _wordchars_re is None:
  203. _init_regex()
  204. s = s.strip()
  205. words = []
  206. pos = 0
  207. while s:
  208. m = _wordchars_re.match(s, pos)
  209. end = m.end()
  210. if end == len(s):
  211. words.append(s[:end])
  212. break
  213. if s[end] in string.whitespace:
  214. # unescaped, unquoted whitespace: now
  215. # we definitely have a word delimiter
  216. words.append(s[:end])
  217. s = s[end:].lstrip()
  218. pos = 0
  219. elif s[end] == '\\':
  220. # preserve whatever is being escaped;
  221. # will become part of the current word
  222. s = s[:end] + s[end + 1 :]
  223. pos = end + 1
  224. else:
  225. if s[end] == "'": # slurp singly-quoted string
  226. m = _squote_re.match(s, end)
  227. elif s[end] == '"': # slurp doubly-quoted string
  228. m = _dquote_re.match(s, end)
  229. else:
  230. raise RuntimeError(f"this can't happen (bad char '{s[end]}')")
  231. if m is None:
  232. raise ValueError(f"bad string (mismatched {s[end]} quotes?)")
  233. (beg, end) = m.span()
  234. s = s[:beg] + s[beg + 1 : end - 1] + s[end:]
  235. pos = m.end() - 2
  236. if pos >= len(s):
  237. words.append(s)
  238. break
  239. return words
  240. # split_quoted ()
  241. def execute(
  242. func: Callable[[Unpack[_Ts]], object],
  243. args: tuple[Unpack[_Ts]],
  244. msg: object = None,
  245. verbose: bool = False,
  246. ) -> None:
  247. """
  248. Perform some action that affects the outside world (e.g. by
  249. writing to the filesystem). Was previously used to deal with
  250. "dry run" operations, but now runs unconditionally.
  251. """
  252. if msg is None:
  253. msg = f"{func.__name__}{args!r}"
  254. if msg[-2:] == ',)': # correct for singleton tuple
  255. msg = msg[0:-2] + ')'
  256. log.info(msg)
  257. func(*args)
  258. def strtobool(val: str) -> bool:
  259. """Convert a string representation of truth to true (1) or false (0).
  260. True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
  261. are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
  262. 'val' is anything else.
  263. """
  264. val = val.lower()
  265. if val in ('y', 'yes', 't', 'true', 'on', '1'):
  266. return True
  267. elif val in ('n', 'no', 'f', 'false', 'off', '0'):
  268. return False
  269. else:
  270. raise ValueError(f"invalid truth value {val!r}")
  271. def byte_compile( # noqa: C901
  272. py_files: Iterable[str],
  273. optimize: int = 0,
  274. force: bool = False,
  275. prefix: str | None = None,
  276. base_dir: str | None = None,
  277. verbose: bool = True,
  278. direct: bool | None = None,
  279. ) -> None:
  280. """Byte-compile a collection of Python source files to .pyc
  281. files in a __pycache__ subdirectory. 'py_files' is a list
  282. of files to compile; any files that don't end in ".py" are silently
  283. skipped. 'optimize' must be one of the following:
  284. 0 - don't optimize
  285. 1 - normal optimization (like "python -O")
  286. 2 - extra optimization (like "python -OO")
  287. If 'force' is true, all files are recompiled regardless of
  288. timestamps.
  289. The source filename encoded in each bytecode file defaults to the
  290. filenames listed in 'py_files'; you can modify these with 'prefix' and
  291. 'basedir'. 'prefix' is a string that will be stripped off of each
  292. source filename, and 'base_dir' is a directory name that will be
  293. prepended (after 'prefix' is stripped). You can supply either or both
  294. (or neither) of 'prefix' and 'base_dir', as you wish.
  295. Byte-compilation is either done directly in this interpreter process
  296. with the standard py_compile module, or indirectly by writing a
  297. temporary script and executing it. Normally, you should let
  298. 'byte_compile()' figure out to use direct compilation or not (see
  299. the source for details). The 'direct' flag is used by the script
  300. generated in indirect mode; unless you know what you're doing, leave
  301. it set to None.
  302. """
  303. # nothing is done if sys.dont_write_bytecode is True
  304. if sys.dont_write_bytecode:
  305. raise DistutilsByteCompileError('byte-compiling is disabled.')
  306. # First, if the caller didn't force us into direct or indirect mode,
  307. # figure out which mode we should be in. We take a conservative
  308. # approach: choose direct mode *only* if the current interpreter is
  309. # in debug mode and optimize is 0. If we're not in debug mode (-O
  310. # or -OO), we don't know which level of optimization this
  311. # interpreter is running with, so we can't do direct
  312. # byte-compilation and be certain that it's the right thing. Thus,
  313. # always compile indirectly if the current interpreter is in either
  314. # optimize mode, or if either optimization level was requested by
  315. # the caller.
  316. if direct is None:
  317. direct = __debug__ and optimize == 0
  318. # "Indirect" byte-compilation: write a temporary script and then
  319. # run it with the appropriate flags.
  320. if not direct:
  321. (script_fd, script_name) = tempfile.mkstemp(".py")
  322. log.info("writing byte-compilation script '%s'", script_name)
  323. script = os.fdopen(script_fd, "w", encoding='utf-8')
  324. with script:
  325. script.write(
  326. """\
  327. from distutils.util import byte_compile
  328. files = [
  329. """
  330. )
  331. # XXX would be nice to write absolute filenames, just for
  332. # safety's sake (script should be more robust in the face of
  333. # chdir'ing before running it). But this requires abspath'ing
  334. # 'prefix' as well, and that breaks the hack in build_lib's
  335. # 'byte_compile()' method that carefully tacks on a trailing
  336. # slash (os.sep really) to make sure the prefix here is "just
  337. # right". This whole prefix business is rather delicate -- the
  338. # problem is that it's really a directory, but I'm treating it
  339. # as a dumb string, so trailing slashes and so forth matter.
  340. script.write(",\n".join(map(repr, py_files)) + "]\n")
  341. script.write(
  342. f"""
  343. byte_compile(files, optimize={optimize!r}, force={force!r},
  344. prefix={prefix!r}, base_dir={base_dir!r},
  345. verbose={verbose!r},
  346. direct=True)
  347. """
  348. )
  349. cmd = [sys.executable]
  350. cmd.extend(subprocess._optim_args_from_interpreter_flags())
  351. cmd.append(script_name)
  352. spawn(cmd)
  353. execute(os.remove, (script_name,), f"removing {script_name}")
  354. # "Direct" byte-compilation: use the py_compile module to compile
  355. # right here, right now. Note that the script generated in indirect
  356. # mode simply calls 'byte_compile()' in direct mode, a weird sort of
  357. # cross-process recursion. Hey, it works!
  358. else:
  359. from py_compile import compile
  360. for file in py_files:
  361. if file[-3:] != ".py":
  362. # This lets us be lazy and not filter filenames in
  363. # the "install_lib" command.
  364. continue
  365. # Terminology from the py_compile module:
  366. # cfile - byte-compiled file
  367. # dfile - purported source filename (same as 'file' by default)
  368. if optimize >= 0:
  369. opt = '' if optimize == 0 else optimize
  370. cfile = importlib.util.cache_from_source(file, optimization=opt)
  371. else:
  372. cfile = importlib.util.cache_from_source(file)
  373. dfile = file
  374. if prefix:
  375. if file[: len(prefix)] != prefix:
  376. raise ValueError(
  377. f"invalid prefix: filename {file!r} doesn't start with {prefix!r}"
  378. )
  379. dfile = dfile[len(prefix) :]
  380. if base_dir:
  381. dfile = os.path.join(base_dir, dfile)
  382. cfile_base = os.path.basename(cfile)
  383. if direct:
  384. if force or newer(file, cfile):
  385. log.info("byte-compiling %s to %s", file, cfile_base)
  386. compile(file, cfile, dfile)
  387. else:
  388. log.debug("skipping byte-compilation of %s to %s", file, cfile_base)
  389. def rfc822_escape(header: str) -> str:
  390. """Return a version of the string escaped for inclusion in an
  391. RFC-822 header, by ensuring there are 8 spaces space after each newline.
  392. """
  393. indent = 8 * " "
  394. lines = header.splitlines(keepends=True)
  395. # Emulate the behaviour of `str.split`
  396. # (the terminal line break in `splitlines` does not result in an extra line):
  397. ends_in_newline = lines and lines[-1].splitlines()[0] != lines[-1]
  398. suffix = indent if ends_in_newline else ""
  399. return indent.join(lines) + suffix
  400. def is_mingw() -> bool:
  401. """Returns True if the current platform is mingw.
  402. Python compiled with Mingw-w64 has sys.platform == 'win32' and
  403. get_platform() starts with 'mingw'.
  404. """
  405. return sys.platform == 'win32' and get_platform().startswith('mingw')
  406. def is_freethreaded():
  407. """Return True if the Python interpreter is built with free threading support."""
  408. return bool(sysconfig.get_config_var('Py_GIL_DISABLED'))