_normalization.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. """
  2. Helpers for normalization as expected in wheel/sdist/module file names
  3. and core metadata
  4. """
  5. import re
  6. from typing import TYPE_CHECKING
  7. import packaging
  8. # https://packaging.python.org/en/latest/specifications/core-metadata/#name
  9. _VALID_NAME = re.compile(r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE)
  10. _UNSAFE_NAME_CHARS = re.compile(r"[^A-Z0-9._-]+", re.IGNORECASE)
  11. _NON_ALPHANUMERIC = re.compile(r"[^A-Z0-9]+", re.IGNORECASE)
  12. _PEP440_FALLBACK = re.compile(
  13. r"^v?(?P<safe>(?:[0-9]+!)?[0-9]+(?:\.[0-9]+)*)", re.IGNORECASE
  14. )
  15. def safe_identifier(name: str) -> str:
  16. """Make a string safe to be used as Python identifier.
  17. >>> safe_identifier("12abc")
  18. '_12abc'
  19. >>> safe_identifier("__editable__.myns.pkg-78.9.3_local")
  20. '__editable___myns_pkg_78_9_3_local'
  21. """
  22. safe = re.sub(r'\W|^(?=\d)', '_', name)
  23. assert safe.isidentifier()
  24. return safe
  25. def safe_name(component: str) -> str:
  26. """Escape a component used as a project name according to Core Metadata.
  27. >>> safe_name("hello world")
  28. 'hello-world'
  29. >>> safe_name("hello?world")
  30. 'hello-world'
  31. >>> safe_name("hello_world")
  32. 'hello_world'
  33. """
  34. return _UNSAFE_NAME_CHARS.sub("-", component)
  35. def safe_version(version: str) -> str:
  36. """Convert an arbitrary string into a valid version string.
  37. Can still raise an ``InvalidVersion`` exception.
  38. To avoid exceptions use ``best_effort_version``.
  39. >>> safe_version("1988 12 25")
  40. '1988.12.25'
  41. >>> safe_version("v0.2.1")
  42. '0.2.1'
  43. >>> safe_version("v0.2?beta")
  44. '0.2b0'
  45. >>> safe_version("v0.2 beta")
  46. '0.2b0'
  47. >>> safe_version("ubuntu lts")
  48. Traceback (most recent call last):
  49. ...
  50. packaging.version.InvalidVersion: Invalid version: 'ubuntu.lts'
  51. """
  52. v = version.replace(' ', '.')
  53. try:
  54. return str(packaging.version.Version(v))
  55. except packaging.version.InvalidVersion:
  56. attempt = _UNSAFE_NAME_CHARS.sub("-", v)
  57. return str(packaging.version.Version(attempt))
  58. def best_effort_version(version: str) -> str:
  59. """Convert an arbitrary string into a version-like string.
  60. Fallback when ``safe_version`` is not safe enough.
  61. >>> best_effort_version("v0.2 beta")
  62. '0.2b0'
  63. >>> best_effort_version("ubuntu lts")
  64. '0.dev0+sanitized.ubuntu.lts'
  65. >>> best_effort_version("0.23ubuntu1")
  66. '0.23.dev0+sanitized.ubuntu1'
  67. >>> best_effort_version("0.23-")
  68. '0.23.dev0+sanitized'
  69. >>> best_effort_version("0.-_")
  70. '0.dev0+sanitized'
  71. >>> best_effort_version("42.+?1")
  72. '42.dev0+sanitized.1'
  73. """
  74. try:
  75. return safe_version(version)
  76. except packaging.version.InvalidVersion:
  77. v = version.replace(' ', '.')
  78. match = _PEP440_FALLBACK.search(v)
  79. if match:
  80. safe = match["safe"]
  81. rest = v[len(safe) :]
  82. else:
  83. safe = "0"
  84. rest = version
  85. safe_rest = _NON_ALPHANUMERIC.sub(".", rest).strip(".")
  86. local = f"sanitized.{safe_rest}".strip(".")
  87. return safe_version(f"{safe}.dev0+{local}")
  88. def safe_extra(extra: str) -> str:
  89. """Normalize extra name according to PEP 685
  90. >>> safe_extra("_FrIeNdLy-._.-bArD")
  91. 'friendly-bard'
  92. >>> safe_extra("FrIeNdLy-._.-bArD__._-")
  93. 'friendly-bard'
  94. """
  95. return _NON_ALPHANUMERIC.sub("-", extra).strip("-").lower()
  96. def filename_component(value: str) -> str:
  97. """Normalize each component of a filename (e.g. distribution/version part of wheel)
  98. Note: ``value`` needs to be already normalized.
  99. >>> filename_component("my-pkg")
  100. 'my_pkg'
  101. """
  102. return value.replace("-", "_").strip("_")
  103. def filename_component_broken(value: str) -> str:
  104. """
  105. Produce the incorrect filename component for compatibility.
  106. See pypa/setuptools#4167 for detailed analysis.
  107. TODO: replace this with filename_component after pip 24 is
  108. nearly-ubiquitous.
  109. >>> filename_component_broken('foo_bar-baz')
  110. 'foo-bar-baz'
  111. """
  112. return value.replace('_', '-')
  113. def safer_name(value: str) -> str:
  114. """Like ``safe_name`` but can be used as filename component for wheel"""
  115. # See bdist_wheel.safer_name
  116. return (
  117. # Per https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization
  118. re
  119. .sub(r"[-_.]+", "-", safe_name(value))
  120. .lower()
  121. # Per https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode
  122. .replace("-", "_")
  123. )
  124. def safer_best_effort_version(value: str) -> str:
  125. """Like ``best_effort_version`` but can be used as filename component for wheel"""
  126. # See bdist_wheel.safer_verion
  127. # TODO: Replace with only safe_version in the future (no need for best effort)
  128. return filename_component(best_effort_version(value))
  129. def _missing_canonicalize_license_expression(expression: str) -> str:
  130. """
  131. Defer import error to affect only users that actually use it
  132. https://github.com/pypa/setuptools/issues/4894
  133. >>> _missing_canonicalize_license_expression("a OR b")
  134. Traceback (most recent call last):
  135. ...
  136. ImportError: ...Cannot import `packaging.licenses`...
  137. """
  138. raise ImportError(
  139. "Cannot import `packaging.licenses`."
  140. """
  141. Setuptools>=77.0.0 requires "packaging>=24.2" to work properly.
  142. Please make sure you have a suitable version installed.
  143. """
  144. )
  145. try:
  146. from packaging.licenses import (
  147. canonicalize_license_expression as _canonicalize_license_expression,
  148. )
  149. except ImportError: # pragma: nocover
  150. if not TYPE_CHECKING:
  151. # XXX: pyright is still upset even with # pyright: ignore[reportAssignmentType]
  152. _canonicalize_license_expression = _missing_canonicalize_license_expression