convert.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. from __future__ import annotations
  2. import os.path
  3. import re
  4. from abc import ABCMeta, abstractmethod
  5. from collections import defaultdict
  6. from collections.abc import Iterator
  7. from email.message import Message
  8. from email.parser import Parser
  9. from email.policy import EmailPolicy
  10. from glob import iglob
  11. from pathlib import Path
  12. from textwrap import dedent
  13. from zipfile import ZipFile
  14. from packaging.tags import parse_tag
  15. from .. import __version__
  16. from .._metadata import generate_requirements
  17. from ..wheelfile import WheelFile
  18. egg_filename_re = re.compile(
  19. r"""
  20. (?P<name>.+?)-(?P<ver>.+?)
  21. (-(?P<pyver>py\d\.\d+)
  22. (-(?P<arch>.+?))?
  23. )?.egg$""",
  24. re.VERBOSE,
  25. )
  26. egg_info_re = re.compile(
  27. r"""
  28. ^(?P<name>.+?)-(?P<ver>.+?)
  29. (-(?P<pyver>py\d\.\d+)
  30. )?.egg-info/""",
  31. re.VERBOSE,
  32. )
  33. wininst_re = re.compile(
  34. r"\.(?P<platform>win32|win-amd64)(?:-(?P<pyver>py\d\.\d))?\.exe$"
  35. )
  36. pyd_re = re.compile(r"\.(?P<abi>[a-z0-9]+)-(?P<platform>win32|win_amd64)\.pyd$")
  37. serialization_policy = EmailPolicy(
  38. utf8=True,
  39. mangle_from_=False,
  40. max_line_length=0,
  41. )
  42. GENERATOR = f"wheel {__version__}"
  43. def convert_requires(requires: str, metadata: Message) -> None:
  44. extra: str | None = None
  45. requirements: dict[str | None, list[str]] = defaultdict(list)
  46. for line in requires.splitlines():
  47. line = line.strip()
  48. if not line:
  49. continue
  50. if line.startswith("[") and line.endswith("]"):
  51. extra = line[1:-1]
  52. continue
  53. requirements[extra].append(line)
  54. for key, value in generate_requirements(requirements):
  55. metadata.add_header(key, value)
  56. def convert_pkg_info(pkginfo: str, metadata: Message) -> None:
  57. parsed_message = Parser().parsestr(pkginfo)
  58. for key, value in parsed_message.items():
  59. key_lower = key.lower()
  60. if value == "UNKNOWN":
  61. continue
  62. if key_lower == "description":
  63. description_lines = value.splitlines()
  64. if description_lines:
  65. value = "\n".join(
  66. (
  67. description_lines[0].lstrip(),
  68. dedent("\n".join(description_lines[1:])),
  69. "\n",
  70. )
  71. )
  72. else:
  73. value = "\n"
  74. metadata.set_payload(value)
  75. elif key_lower == "home-page":
  76. metadata.add_header("Project-URL", f"Homepage, {value}")
  77. elif key_lower == "download-url":
  78. metadata.add_header("Project-URL", f"Download, {value}")
  79. else:
  80. metadata.add_header(key, value)
  81. metadata.replace_header("Metadata-Version", "2.4")
  82. def normalize(name: str) -> str:
  83. return re.sub(r"[-_.]+", "-", name).lower().replace("-", "_")
  84. class ConvertSource(metaclass=ABCMeta):
  85. name: str
  86. version: str
  87. pyver: str = "py2.py3"
  88. abi: str = "none"
  89. platform: str = "any"
  90. metadata: Message
  91. @property
  92. def dist_info_dir(self) -> str:
  93. return f"{self.name}-{self.version}.dist-info"
  94. @abstractmethod
  95. def generate_contents(self) -> Iterator[tuple[str, bytes]]:
  96. pass
  97. class EggFileSource(ConvertSource):
  98. def __init__(self, path: Path):
  99. if not (match := egg_filename_re.match(path.name)):
  100. raise ValueError(f"Invalid egg file name: {path.name}")
  101. # Binary wheels are assumed to be for CPython
  102. self.path = path
  103. self.name = normalize(match.group("name"))
  104. self.version = match.group("ver")
  105. if pyver := match.group("pyver"):
  106. self.pyver = pyver.replace(".", "")
  107. if arch := match.group("arch"):
  108. self.abi = self.pyver.replace("py", "cp")
  109. self.platform = normalize(arch)
  110. self.metadata = Message()
  111. def generate_contents(self) -> Iterator[tuple[str, bytes]]:
  112. with ZipFile(self.path, "r") as zip_file:
  113. for filename in sorted(zip_file.namelist()):
  114. # Skip pure directory entries
  115. if filename.endswith("/"):
  116. continue
  117. # Handle files in the egg-info directory specially, selectively moving
  118. # them to the dist-info directory while converting as needed
  119. if filename.startswith("EGG-INFO/"):
  120. if filename == "EGG-INFO/requires.txt":
  121. requires = zip_file.read(filename).decode("utf-8")
  122. convert_requires(requires, self.metadata)
  123. elif filename == "EGG-INFO/PKG-INFO":
  124. pkginfo = zip_file.read(filename).decode("utf-8")
  125. convert_pkg_info(pkginfo, self.metadata)
  126. elif filename == "EGG-INFO/entry_points.txt":
  127. yield (
  128. f"{self.dist_info_dir}/entry_points.txt",
  129. zip_file.read(filename),
  130. )
  131. continue
  132. # For any other file, just pass it through
  133. yield filename, zip_file.read(filename)
  134. class EggDirectorySource(EggFileSource):
  135. def generate_contents(self) -> Iterator[tuple[str, bytes]]:
  136. for dirpath, _, filenames in os.walk(self.path):
  137. for filename in sorted(filenames):
  138. path = Path(dirpath, filename)
  139. if path.parent.name == "EGG-INFO":
  140. if path.name == "requires.txt":
  141. requires = path.read_text("utf-8")
  142. convert_requires(requires, self.metadata)
  143. elif path.name == "PKG-INFO":
  144. pkginfo = path.read_text("utf-8")
  145. convert_pkg_info(pkginfo, self.metadata)
  146. if name := self.metadata.get("Name"):
  147. self.name = normalize(name)
  148. if version := self.metadata.get("Version"):
  149. self.version = version
  150. elif path.name == "entry_points.txt":
  151. yield (
  152. f"{self.dist_info_dir}/entry_points.txt",
  153. path.read_bytes(),
  154. )
  155. continue
  156. # For any other file, just pass it through
  157. yield str(path.relative_to(self.path)), path.read_bytes()
  158. class WininstFileSource(ConvertSource):
  159. """
  160. Handles distributions created with ``bdist_wininst``.
  161. The egginfo filename has the format::
  162. name-ver(-pyver)(-arch).egg-info
  163. The installer filename has the format::
  164. name-ver.arch(-pyver).exe
  165. Some things to note:
  166. 1. The installer filename is not definitive. An installer can be renamed
  167. and work perfectly well as an installer. So more reliable data should
  168. be used whenever possible.
  169. 2. The egg-info data should be preferred for the name and version, because
  170. these come straight from the distutils metadata, and are mandatory.
  171. 3. The pyver from the egg-info data should be ignored, as it is
  172. constructed from the version of Python used to build the installer,
  173. which is irrelevant - the installer filename is correct here (even to
  174. the point that when it's not there, any version is implied).
  175. 4. The architecture must be taken from the installer filename, as it is
  176. not included in the egg-info data.
  177. 5. Architecture-neutral installers still have an architecture because the
  178. installer format itself (being executable) is architecture-specific. We
  179. should therefore ignore the architecture if the content is pure-python.
  180. """
  181. def __init__(self, path: Path):
  182. self.path = path
  183. self.metadata = Message()
  184. # Determine the initial architecture and Python version from the file name
  185. # (if possible)
  186. if match := wininst_re.search(path.name):
  187. self.platform = normalize(match.group("platform"))
  188. if pyver := match.group("pyver"):
  189. self.pyver = pyver.replace(".", "")
  190. # Look for an .egg-info directory and any .pyd files for more precise info
  191. egg_info_found = pyd_found = False
  192. with ZipFile(self.path) as zip_file:
  193. for filename in zip_file.namelist():
  194. prefix, filename = filename.split("/", 1)
  195. if not egg_info_found and (match := egg_info_re.match(filename)):
  196. egg_info_found = True
  197. self.name = normalize(match.group("name"))
  198. self.version = match.group("ver")
  199. if pyver := match.group("pyver"):
  200. self.pyver = pyver.replace(".", "")
  201. elif not pyd_found and (match := pyd_re.search(filename)):
  202. pyd_found = True
  203. self.abi = match.group("abi")
  204. self.platform = match.group("platform")
  205. if egg_info_found and pyd_found:
  206. break
  207. def generate_contents(self) -> Iterator[tuple[str, bytes]]:
  208. dist_info_dir = f"{self.name}-{self.version}.dist-info"
  209. data_dir = f"{self.name}-{self.version}.data"
  210. with ZipFile(self.path, "r") as zip_file:
  211. for filename in sorted(zip_file.namelist()):
  212. # Skip pure directory entries
  213. if filename.endswith("/"):
  214. continue
  215. # Handle files in the egg-info directory specially, selectively moving
  216. # them to the dist-info directory while converting as needed
  217. prefix, target_filename = filename.split("/", 1)
  218. if egg_info_re.search(target_filename):
  219. basename = target_filename.rsplit("/", 1)[-1]
  220. if basename == "requires.txt":
  221. requires = zip_file.read(filename).decode("utf-8")
  222. convert_requires(requires, self.metadata)
  223. elif basename == "PKG-INFO":
  224. pkginfo = zip_file.read(filename).decode("utf-8")
  225. convert_pkg_info(pkginfo, self.metadata)
  226. elif basename == "entry_points.txt":
  227. yield (
  228. f"{dist_info_dir}/entry_points.txt",
  229. zip_file.read(filename),
  230. )
  231. continue
  232. elif prefix == "SCRIPTS":
  233. target_filename = f"{data_dir}/scripts/{target_filename}"
  234. # For any other file, just pass it through
  235. yield target_filename, zip_file.read(filename)
  236. def convert(files: list[str], dest_dir: str, verbose: bool) -> None:
  237. for pat in files:
  238. for archive in iglob(pat):
  239. path = Path(archive)
  240. if path.suffix == ".egg":
  241. if path.is_dir():
  242. source: ConvertSource = EggDirectorySource(path)
  243. else:
  244. source = EggFileSource(path)
  245. else:
  246. source = WininstFileSource(path)
  247. if verbose:
  248. print(f"{archive}...", flush=True, end="")
  249. dest_path = Path(dest_dir) / (
  250. f"{source.name}-{source.version}-{source.pyver}-{source.abi}"
  251. f"-{source.platform}.whl"
  252. )
  253. with WheelFile(dest_path, "w") as wheelfile:
  254. for name_or_zinfo, contents in source.generate_contents():
  255. wheelfile.writestr(name_or_zinfo, contents)
  256. # Write the METADATA file
  257. wheelfile.writestr(
  258. f"{source.dist_info_dir}/METADATA",
  259. source.metadata.as_string(policy=serialization_policy).encode(
  260. "utf-8"
  261. ),
  262. )
  263. # Write the WHEEL file
  264. wheel_message = Message()
  265. wheel_message.add_header("Wheel-Version", "1.0")
  266. wheel_message.add_header("Generator", GENERATOR)
  267. wheel_message.add_header(
  268. "Root-Is-Purelib", str(source.platform == "any").lower()
  269. )
  270. tags = parse_tag(f"{source.pyver}-{source.abi}-{source.platform}")
  271. for tag in sorted(tags, key=lambda tag: tag.interpreter):
  272. wheel_message.add_header("Tag", str(tag))
  273. wheelfile.writestr(
  274. f"{source.dist_info_dir}/WHEEL",
  275. wheel_message.as_string(policy=serialization_policy).encode(
  276. "utf-8"
  277. ),
  278. )
  279. if verbose:
  280. print("OK")