build_py.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. from __future__ import annotations
  2. import fnmatch
  3. import itertools
  4. import operator
  5. import os
  6. import stat
  7. import textwrap
  8. from collections.abc import Iterable, Iterator
  9. from functools import partial
  10. from glob import glob
  11. from pathlib import Path
  12. from typing import Any
  13. from more_itertools import unique_everseen
  14. from .._path import StrPath, StrPathT
  15. from ..dist import Distribution
  16. from ..warnings import SetuptoolsDeprecationWarning
  17. import distutils.command.build_py as orig
  18. import distutils.errors
  19. from distutils.util import convert_path
  20. _IMPLICIT_DATA_FILES = ('*.pyi', 'py.typed')
  21. def make_writable(target) -> None:
  22. os.chmod(target, os.stat(target).st_mode | stat.S_IWRITE)
  23. class build_py(orig.build_py):
  24. """Enhanced 'build_py' command that includes data files with packages
  25. The data files are specified via a 'package_data' argument to 'setup()'.
  26. See 'setuptools.dist.Distribution' for more details.
  27. Also, this version of the 'build_py' command allows you to specify both
  28. 'py_modules' and 'packages' in the same setup operation.
  29. """
  30. distribution: Distribution # override distutils.dist.Distribution with setuptools.dist.Distribution
  31. editable_mode: bool = False
  32. existing_egg_info_dir: StrPath | None = None #: Private API, internal use only.
  33. def finalize_options(self) -> None:
  34. orig.build_py.finalize_options(self)
  35. self.package_data = self.distribution.package_data
  36. self.exclude_package_data = self.distribution.exclude_package_data or {}
  37. if 'data_files' in self.__dict__:
  38. del self.__dict__['data_files']
  39. def copy_file( # type: ignore[override] # No overload, no bytes support
  40. self,
  41. infile: StrPath,
  42. outfile: StrPathT,
  43. preserve_mode: bool = True,
  44. preserve_times: bool = True,
  45. link: str | None = None,
  46. level: object = 1,
  47. ) -> tuple[StrPathT | str, bool]:
  48. # Overwrite base class to allow using links
  49. if link:
  50. infile = str(Path(infile).resolve())
  51. outfile = str(Path(outfile).resolve()) # type: ignore[assignment] # Re-assigning a str when outfile is StrPath is ok
  52. return super().copy_file( # pyright: ignore[reportReturnType] # pypa/distutils#309
  53. infile, outfile, preserve_mode, preserve_times, link, level
  54. )
  55. def run(self) -> None:
  56. """Build modules, packages, and copy data files to build directory"""
  57. if not (self.py_modules or self.packages) or self.editable_mode:
  58. return
  59. if self.py_modules:
  60. self.build_modules()
  61. if self.packages:
  62. self.build_packages()
  63. self.build_package_data()
  64. # Only compile actual .py files, using our base class' idea of what our
  65. # output files are.
  66. self.byte_compile(orig.build_py.get_outputs(self, include_bytecode=False))
  67. # Should return "list[tuple[str, str, str, list[str]]] | Any" but can't do without typed distutils on Python 3.12+
  68. def __getattr__(self, attr: str) -> Any:
  69. "lazily compute data files"
  70. if attr == 'data_files':
  71. self.data_files = self._get_data_files()
  72. return self.data_files
  73. return orig.build_py.__getattr__(self, attr)
  74. def _get_data_files(self):
  75. """Generate list of '(package,src_dir,build_dir,filenames)' tuples"""
  76. self.analyze_manifest()
  77. return list(map(self._get_pkg_data_files, self.packages or ()))
  78. def get_data_files_without_manifest(self) -> list[tuple[str, str, str, list[str]]]:
  79. """
  80. Generate list of ``(package,src_dir,build_dir,filenames)`` tuples,
  81. but without triggering any attempt to analyze or build the manifest.
  82. """
  83. # Prevent eventual errors from unset `manifest_files`
  84. # (that would otherwise be set by `analyze_manifest`)
  85. self.__dict__.setdefault('manifest_files', {})
  86. return list(map(self._get_pkg_data_files, self.packages or ()))
  87. def _get_pkg_data_files(self, package: str) -> tuple[str, str, str, list[str]]:
  88. # Locate package source directory
  89. src_dir = self.get_package_dir(package)
  90. # Compute package build directory
  91. build_dir = os.path.join(*([self.build_lib] + package.split('.')))
  92. # Strip directory from globbed filenames
  93. filenames = [
  94. os.path.relpath(file, src_dir)
  95. for file in self.find_data_files(package, src_dir)
  96. ]
  97. return package, src_dir, build_dir, filenames
  98. def find_data_files(self, package, src_dir):
  99. """Return filenames for package's data files in 'src_dir'"""
  100. patterns = self._get_platform_patterns(
  101. self.package_data,
  102. package,
  103. src_dir,
  104. extra_patterns=_IMPLICIT_DATA_FILES,
  105. )
  106. globs_expanded = map(partial(glob, recursive=True), patterns)
  107. # flatten the expanded globs into an iterable of matches
  108. globs_matches = itertools.chain.from_iterable(globs_expanded)
  109. glob_files = filter(os.path.isfile, globs_matches)
  110. files = itertools.chain(
  111. self.manifest_files.get(package, []),
  112. glob_files,
  113. )
  114. return self.exclude_data_files(package, src_dir, files)
  115. def get_outputs(self, include_bytecode: bool = True) -> list[str]: # type: ignore[override] # Using a real boolean instead of 0|1
  116. """See :class:`setuptools.commands.build.SubCommand`"""
  117. if self.editable_mode:
  118. return list(self.get_output_mapping().keys())
  119. return super().get_outputs(include_bytecode)
  120. def get_output_mapping(self) -> dict[str, str]:
  121. """See :class:`setuptools.commands.build.SubCommand`"""
  122. mapping = itertools.chain(
  123. self._get_package_data_output_mapping(),
  124. self._get_module_mapping(),
  125. )
  126. return dict(sorted(mapping, key=operator.itemgetter(0)))
  127. def _get_module_mapping(self) -> Iterator[tuple[str, str]]:
  128. """Iterate over all modules producing (dest, src) pairs."""
  129. for package, module, module_file in self.find_all_modules():
  130. package = package.split('.')
  131. filename = self.get_module_outfile(self.build_lib, package, module)
  132. yield (filename, module_file)
  133. def _get_package_data_output_mapping(self) -> Iterator[tuple[str, str]]:
  134. """Iterate over package data producing (dest, src) pairs."""
  135. for package, src_dir, build_dir, filenames in self.data_files:
  136. for filename in filenames:
  137. target = os.path.join(build_dir, filename)
  138. srcfile = os.path.join(src_dir, filename)
  139. yield (target, srcfile)
  140. def build_package_data(self) -> None:
  141. """Copy data files into build directory"""
  142. for target, srcfile in self._get_package_data_output_mapping():
  143. self.mkpath(os.path.dirname(target))
  144. _outf, _copied = self.copy_file(srcfile, target)
  145. make_writable(target)
  146. def analyze_manifest(self) -> None:
  147. self.manifest_files: dict[str, list[str]] = {}
  148. if not self.distribution.include_package_data:
  149. return
  150. src_dirs: dict[str, str] = {}
  151. for package in self.packages or ():
  152. # Locate package source directory
  153. src_dirs[assert_relative(self.get_package_dir(package))] = package
  154. if (
  155. self.existing_egg_info_dir
  156. and Path(self.existing_egg_info_dir, "SOURCES.txt").exists()
  157. ):
  158. egg_info_dir = self.existing_egg_info_dir
  159. manifest = Path(egg_info_dir, "SOURCES.txt")
  160. files = manifest.read_text(encoding="utf-8").splitlines()
  161. else:
  162. self.run_command('egg_info')
  163. ei_cmd = self.get_finalized_command('egg_info')
  164. egg_info_dir = ei_cmd.egg_info
  165. files = ei_cmd.filelist.files
  166. check = _IncludePackageDataAbuse()
  167. for path in self._filter_build_files(files, egg_info_dir):
  168. d, f = os.path.split(assert_relative(path))
  169. prev = None
  170. oldf = f
  171. while d and d != prev and d not in src_dirs:
  172. prev = d
  173. d, df = os.path.split(d)
  174. f = os.path.join(df, f)
  175. if d in src_dirs:
  176. if f == oldf:
  177. if check.is_module(f):
  178. continue # it's a module, not data
  179. else:
  180. importable = check.importable_subpackage(src_dirs[d], f)
  181. if importable:
  182. check.warn(importable)
  183. self.manifest_files.setdefault(src_dirs[d], []).append(path)
  184. def _filter_build_files(
  185. self, files: Iterable[str], egg_info: StrPath
  186. ) -> Iterator[str]:
  187. """
  188. ``build_meta`` may try to create egg_info outside of the project directory,
  189. and this can be problematic for certain plugins (reported in issue #3500).
  190. Extensions might also include between their sources files created on the
  191. ``build_lib`` and ``build_temp`` directories.
  192. This function should filter this case of invalid files out.
  193. """
  194. build = self.get_finalized_command("build")
  195. build_dirs = (egg_info, self.build_lib, build.build_temp, build.build_base)
  196. norm_dirs = [os.path.normpath(p) for p in build_dirs if p]
  197. for file in files:
  198. norm_path = os.path.normpath(file)
  199. if not os.path.isabs(file) or all(d not in norm_path for d in norm_dirs):
  200. yield file
  201. def get_data_files(self) -> None:
  202. pass # Lazily compute data files in _get_data_files() function.
  203. def check_package(self, package, package_dir):
  204. """Check namespace packages' __init__ for declare_namespace"""
  205. try:
  206. return self.packages_checked[package]
  207. except KeyError:
  208. pass
  209. init_py = orig.build_py.check_package(self, package, package_dir)
  210. self.packages_checked[package] = init_py
  211. if not init_py or not self.distribution.namespace_packages:
  212. return init_py
  213. for pkg in self.distribution.namespace_packages:
  214. if pkg == package or pkg.startswith(package + '.'):
  215. break
  216. else:
  217. return init_py
  218. with open(init_py, 'rb') as f:
  219. contents = f.read()
  220. if b'declare_namespace' not in contents:
  221. raise distutils.errors.DistutilsError(
  222. f"Namespace package problem: {package} is a namespace package, but "
  223. "its\n__init__.py does not call declare_namespace()! Please "
  224. 'fix it.\n(See the setuptools manual under '
  225. '"Namespace Packages" for details.)\n"'
  226. )
  227. return init_py
  228. def initialize_options(self):
  229. self.packages_checked = {}
  230. orig.build_py.initialize_options(self)
  231. self.editable_mode = False
  232. self.existing_egg_info_dir = None
  233. def get_package_dir(self, package: str) -> str:
  234. res = orig.build_py.get_package_dir(self, package)
  235. if self.distribution.src_root is not None:
  236. return os.path.join(self.distribution.src_root, res)
  237. return res
  238. def exclude_data_files(self, package, src_dir, files):
  239. """Filter filenames for package's data files in 'src_dir'"""
  240. files = list(files)
  241. patterns = self._get_platform_patterns(
  242. self.exclude_package_data,
  243. package,
  244. src_dir,
  245. )
  246. match_groups = (fnmatch.filter(files, pattern) for pattern in patterns)
  247. # flatten the groups of matches into an iterable of matches
  248. matches = itertools.chain.from_iterable(match_groups)
  249. bad = set(matches)
  250. keepers = (fn for fn in files if fn not in bad)
  251. # ditch dupes
  252. return list(unique_everseen(keepers))
  253. @staticmethod
  254. def _get_platform_patterns(spec, package, src_dir, extra_patterns=()):
  255. """
  256. yield platform-specific path patterns (suitable for glob
  257. or fn_match) from a glob-based spec (such as
  258. self.package_data or self.exclude_package_data)
  259. matching package in src_dir.
  260. """
  261. raw_patterns = itertools.chain(
  262. extra_patterns,
  263. spec.get('', []),
  264. spec.get(package, []),
  265. )
  266. return (
  267. # Each pattern has to be converted to a platform-specific path
  268. os.path.join(src_dir, convert_path(pattern))
  269. for pattern in raw_patterns
  270. )
  271. def assert_relative(path):
  272. if not os.path.isabs(path):
  273. return path
  274. from distutils.errors import DistutilsSetupError
  275. msg = (
  276. textwrap.dedent(
  277. """
  278. Error: setup script specifies an absolute path:
  279. %s
  280. setup() arguments must *always* be /-separated paths relative to the
  281. setup.py directory, *never* absolute paths.
  282. """
  283. ).lstrip()
  284. % path
  285. )
  286. raise DistutilsSetupError(msg)
  287. class _IncludePackageDataAbuse:
  288. """Inform users that package or module is included as 'data file'"""
  289. class _Warning(SetuptoolsDeprecationWarning):
  290. _SUMMARY = """
  291. Package {importable!r} is absent from the `packages` configuration.
  292. """
  293. _DETAILS = """
  294. ############################
  295. # Package would be ignored #
  296. ############################
  297. Python recognizes {importable!r} as an importable package[^1],
  298. but it is absent from setuptools' `packages` configuration.
  299. This leads to an ambiguous overall configuration. If you want to distribute this
  300. package, please make sure that {importable!r} is explicitly added
  301. to the `packages` configuration field.
  302. Alternatively, you can also rely on setuptools' discovery methods
  303. (for example by using `find_namespace_packages(...)`/`find_namespace:`
  304. instead of `find_packages(...)`/`find:`).
  305. You can read more about "package discovery" on setuptools documentation page:
  306. - https://setuptools.pypa.io/en/latest/userguide/package_discovery.html
  307. If you don't want {importable!r} to be distributed and are
  308. already explicitly excluding {importable!r} via
  309. `find_namespace_packages(...)/find_namespace` or `find_packages(...)/find`,
  310. you can try to use `exclude_package_data`, or `include-package-data=False` in
  311. combination with a more fine grained `package-data` configuration.
  312. You can read more about "package data files" on setuptools documentation page:
  313. - https://setuptools.pypa.io/en/latest/userguide/datafiles.html
  314. [^1]: For Python, any directory (with suitable naming) can be imported,
  315. even if it does not contain any `.py` files.
  316. On the other hand, currently there is no concept of package data
  317. directory, all directories are treated like packages.
  318. """
  319. # _DUE_DATE: still not defined as this is particularly controversial.
  320. # Warning initially introduced in May 2022. See issue #3340 for discussion.
  321. def __init__(self) -> None:
  322. self._already_warned = set[str]()
  323. def is_module(self, file):
  324. return file.endswith(".py") and file[: -len(".py")].isidentifier()
  325. def importable_subpackage(self, parent, file):
  326. pkg = Path(file).parent
  327. parts = list(itertools.takewhile(str.isidentifier, pkg.parts))
  328. if parts:
  329. return ".".join([parent, *parts])
  330. return None
  331. def warn(self, importable):
  332. if importable not in self._already_warned:
  333. self._Warning.emit(importable=importable)
  334. self._already_warned.add(importable)