formats.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. """
  2. The functions in this module are used to validate schemas with the
  3. `format JSON Schema keyword
  4. <https://json-schema.org/understanding-json-schema/reference/string#format>`_.
  5. The correspondence is given by replacing the ``_`` character in the name of the
  6. function with a ``-`` to obtain the format name and vice versa.
  7. """
  8. from __future__ import annotations
  9. import keyword
  10. import logging
  11. import os
  12. import re
  13. import string
  14. import typing
  15. from itertools import chain as _chain
  16. if typing.TYPE_CHECKING:
  17. import builtins
  18. from typing_extensions import Literal
  19. _logger = logging.getLogger(__name__)
  20. # -------------------------------------------------------------------------------------
  21. # PEP 440
  22. VERSION_PATTERN = r"""
  23. v?
  24. (?:
  25. (?:(?P<epoch>[0-9]+)!)? # epoch
  26. (?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
  27. (?P<pre> # pre-release
  28. [-_\.]?
  29. (?P<pre_l>alpha|a|beta|b|preview|pre|c|rc)
  30. [-_\.]?
  31. (?P<pre_n>[0-9]+)?
  32. )?
  33. (?P<post> # post release
  34. (?:-(?P<post_n1>[0-9]+))
  35. |
  36. (?:
  37. [-_\.]?
  38. (?P<post_l>post|rev|r)
  39. [-_\.]?
  40. (?P<post_n2>[0-9]+)?
  41. )
  42. )?
  43. (?P<dev> # dev release
  44. [-_\.]?
  45. (?P<dev_l>dev)
  46. [-_\.]?
  47. (?P<dev_n>[0-9]+)?
  48. )?
  49. )
  50. (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
  51. """
  52. VERSION_REGEX = re.compile(
  53. r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE
  54. )
  55. def pep440(version: str) -> bool:
  56. """See :ref:`PyPA's version specification <pypa:version-specifiers>`
  57. (initially introduced in :pep:`440`).
  58. """
  59. return VERSION_REGEX.match(version) is not None
  60. # -------------------------------------------------------------------------------------
  61. # PEP 508
  62. PEP508_IDENTIFIER_PATTERN = r"([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])"
  63. PEP508_IDENTIFIER_REGEX = re.compile(f"^{PEP508_IDENTIFIER_PATTERN}$", re.IGNORECASE)
  64. def pep508_identifier(name: str) -> bool:
  65. """See :ref:`PyPA's name specification <pypa:name-format>`
  66. (initially introduced in :pep:`508#names`).
  67. """
  68. return PEP508_IDENTIFIER_REGEX.match(name) is not None
  69. try:
  70. try:
  71. from packaging import requirements as _req
  72. except ImportError: # pragma: no cover
  73. # let's try setuptools vendored version
  74. from setuptools._vendor.packaging import ( # type: ignore[no-redef]
  75. requirements as _req,
  76. )
  77. def pep508(value: str) -> bool:
  78. """See :ref:`PyPA's dependency specifiers <pypa:dependency-specifiers>`
  79. (initially introduced in :pep:`508`).
  80. """
  81. try:
  82. _req.Requirement(value)
  83. except _req.InvalidRequirement:
  84. return False
  85. return True
  86. except ImportError: # pragma: no cover
  87. _logger.warning(
  88. "Could not find an installation of `packaging`. Requirements, dependencies and "
  89. "versions might not be validated. "
  90. "To enforce validation, please install `packaging`."
  91. )
  92. def pep508(value: str) -> bool: # noqa: ARG001
  93. return True
  94. def pep508_versionspec(value: str) -> bool:
  95. """Expression that can be used to specify/lock versions (including ranges)
  96. See ``versionspec`` in :ref:`PyPA's dependency specifiers
  97. <pypa:dependency-specifiers>` (initially introduced in :pep:`508`).
  98. """
  99. if any(c in value for c in (";", "]", "@")):
  100. # In PEP 508:
  101. # conditional markers, extras and URL specs are not included in the
  102. # versionspec
  103. return False
  104. # Let's pretend we have a dependency called `requirement` with the given
  105. # version spec, then we can reuse the pep508 function for validation:
  106. return pep508(f"requirement{value}")
  107. # -------------------------------------------------------------------------------------
  108. # PEP 517
  109. def pep517_backend_reference(value: str) -> bool:
  110. """See PyPA's specification for defining build-backend references
  111. introduced in :pep:`517#source-trees`.
  112. This is similar to an entry-point reference (e.g., ``package.module:object``).
  113. """
  114. module, _, obj = value.partition(":")
  115. identifiers = (i.strip() for i in _chain(module.split("."), obj.split(".")))
  116. return all(python_identifier(i) for i in identifiers if i)
  117. # -------------------------------------------------------------------------------------
  118. # Classifiers - PEP 301
  119. def _download_classifiers() -> str:
  120. import ssl
  121. from email.message import Message
  122. from urllib.request import urlopen
  123. url = "https://pypi.org/pypi?:action=list_classifiers"
  124. context = ssl.create_default_context()
  125. with urlopen(url, context=context) as response: # noqa: S310 (audit URLs)
  126. headers = Message()
  127. headers["content_type"] = response.getheader("content-type", "text/plain")
  128. return response.read().decode(headers.get_param("charset", "utf-8")) # type: ignore[no-any-return]
  129. class _TroveClassifier:
  130. """The ``trove_classifiers`` package is the official way of validating classifiers,
  131. however this package might not be always available.
  132. As a workaround we can still download a list from PyPI.
  133. We also don't want to be over strict about it, so simply skipping silently is an
  134. option (classifiers will be validated anyway during the upload to PyPI).
  135. """
  136. downloaded: None | Literal[False] | set[str]
  137. """
  138. None => not cached yet
  139. False => unavailable
  140. set => cached values
  141. """
  142. def __init__(self) -> None:
  143. self.downloaded = None
  144. self._skip_download = False
  145. self.__name__ = "trove_classifier" # Emulate a public function
  146. def _disable_download(self) -> None:
  147. # This is a private API. Only setuptools has the consent of using it.
  148. self._skip_download = True
  149. def __call__(self, value: str) -> bool:
  150. if self.downloaded is False or self._skip_download is True:
  151. return True
  152. if os.getenv("NO_NETWORK") or os.getenv("VALIDATE_PYPROJECT_NO_NETWORK"):
  153. self.downloaded = False
  154. msg = (
  155. "Install ``trove-classifiers`` to ensure proper validation. "
  156. "Skipping download of classifiers list from PyPI (NO_NETWORK)."
  157. )
  158. _logger.debug(msg)
  159. return True
  160. if self.downloaded is None:
  161. msg = (
  162. "Install ``trove-classifiers`` to ensure proper validation. "
  163. "Meanwhile a list of classifiers will be downloaded from PyPI."
  164. )
  165. _logger.debug(msg)
  166. try:
  167. self.downloaded = set(_download_classifiers().splitlines())
  168. except Exception: # noqa: BLE001
  169. self.downloaded = False
  170. _logger.debug("Problem with download, skipping validation")
  171. return True
  172. return value in self.downloaded or value.lower().startswith("private ::")
  173. try:
  174. from trove_classifiers import classifiers as _trove_classifiers
  175. def trove_classifier(value: str) -> bool:
  176. """See https://pypi.org/classifiers/"""
  177. return value in _trove_classifiers or value.lower().startswith("private ::")
  178. except ImportError: # pragma: no cover
  179. trove_classifier = _TroveClassifier()
  180. # -------------------------------------------------------------------------------------
  181. # Stub packages - PEP 561
  182. def pep561_stub_name(value: str) -> bool:
  183. """Name of a directory containing type stubs.
  184. It must follow the name scheme ``<package>-stubs`` as defined in
  185. :pep:`561#stub-only-packages`.
  186. """
  187. top, *children = value.split(".")
  188. if not top.endswith("-stubs"):
  189. return False
  190. return python_module_name(".".join([top[: -len("-stubs")], *children]))
  191. # -------------------------------------------------------------------------------------
  192. # Non-PEP related
  193. def url(value: str) -> bool:
  194. """Valid URL (validation uses :obj:`urllib.parse`).
  195. For maximum compatibility please make sure to include a ``scheme`` prefix
  196. in your URL (e.g. ``http://``).
  197. """
  198. from urllib.parse import urlparse
  199. try:
  200. parts = urlparse(value)
  201. if not parts.scheme:
  202. _logger.warning(
  203. "For maximum compatibility please make sure to include a "
  204. "`scheme` prefix in your URL (e.g. 'http://'). "
  205. f"Given value: {value}"
  206. )
  207. if not (value.startswith(("/", "\\")) or "@" in value):
  208. parts = urlparse(f"http://{value}")
  209. return bool(parts.scheme and parts.netloc)
  210. except Exception: # noqa: BLE001
  211. return False
  212. # https://packaging.python.org/specifications/entry-points/
  213. ENTRYPOINT_PATTERN = r"[^\[\s=]([^=]*[^\s=])?"
  214. ENTRYPOINT_REGEX = re.compile(f"^{ENTRYPOINT_PATTERN}$", re.IGNORECASE)
  215. RECOMMEDED_ENTRYPOINT_PATTERN = r"[\w.-]+"
  216. RECOMMEDED_ENTRYPOINT_REGEX = re.compile(
  217. f"^{RECOMMEDED_ENTRYPOINT_PATTERN}$", re.IGNORECASE
  218. )
  219. ENTRYPOINT_GROUP_PATTERN = r"\w+(\.\w+)*"
  220. ENTRYPOINT_GROUP_REGEX = re.compile(f"^{ENTRYPOINT_GROUP_PATTERN}$", re.IGNORECASE)
  221. def python_identifier(value: str) -> bool:
  222. """Can be used as identifier in Python.
  223. (Validation uses :obj:`str.isidentifier`).
  224. """
  225. return value.isidentifier()
  226. def python_qualified_identifier(value: str) -> bool:
  227. """
  228. Python "dotted identifier", i.e. a sequence of :obj:`python_identifier`
  229. concatenated with ``"."`` (e.g.: ``package.module.submodule``).
  230. """
  231. if value.startswith(".") or value.endswith("."):
  232. return False
  233. return all(python_identifier(m) for m in value.split("."))
  234. def python_module_name(value: str) -> bool:
  235. """Module name that can be used in an ``import``-statement in Python.
  236. See :obj:`python_qualified_identifier`.
  237. """
  238. return python_qualified_identifier(value)
  239. def python_module_name_relaxed(value: str) -> bool:
  240. """Similar to :obj:`python_module_name`, but relaxed to also accept
  241. dash characters (``-``) and cover special cases like ``pip-run``.
  242. It is recommended, however, that beginners avoid dash characters,
  243. as they require advanced knowledge about Python internals.
  244. The following are disallowed:
  245. * names starting/ending in dashes,
  246. * names ending in ``-stubs`` (potentially collide with :obj:`pep561_stub_name`).
  247. """
  248. if value.startswith("-") or value.endswith("-"):
  249. return False
  250. if value.endswith("-stubs"):
  251. return False # Avoid collision with PEP 561
  252. return python_module_name(value.replace("-", "_"))
  253. def python_entrypoint_group(value: str) -> bool:
  254. """See ``Data model > group`` in the :ref:`PyPA's entry-points specification
  255. <pypa:entry-points>`.
  256. """
  257. return ENTRYPOINT_GROUP_REGEX.match(value) is not None
  258. def python_entrypoint_name(value: str) -> bool:
  259. """See ``Data model > name`` in the :ref:`PyPA's entry-points specification
  260. <pypa:entry-points>`.
  261. """
  262. if not ENTRYPOINT_REGEX.match(value):
  263. return False
  264. if not RECOMMEDED_ENTRYPOINT_REGEX.match(value):
  265. msg = f"Entry point `{value}` does not follow recommended pattern: "
  266. msg += RECOMMEDED_ENTRYPOINT_PATTERN
  267. _logger.warning(msg)
  268. return True
  269. def python_entrypoint_reference(value: str) -> bool:
  270. """Reference to a Python object using in the format::
  271. importable.module:object.attr
  272. See ``Data model >object reference`` in the :ref:`PyPA's entry-points specification
  273. <pypa:entry-points>`.
  274. """
  275. module, _, rest = value.partition(":")
  276. if "[" in rest:
  277. obj, _, extras_ = rest.partition("[")
  278. if extras_.strip()[-1] != "]":
  279. return False
  280. extras = (x.strip() for x in extras_.strip(string.whitespace + "[]").split(","))
  281. if not all(pep508_identifier(e) for e in extras):
  282. return False
  283. _logger.warning(f"`{value}` - using extras for entry points is not recommended")
  284. else:
  285. obj = rest
  286. module_parts = module.split(".")
  287. identifiers = _chain(module_parts, obj.split(".")) if rest else iter(module_parts)
  288. return all(python_identifier(i.strip()) for i in identifiers)
  289. def uint8(value: builtins.int) -> bool:
  290. r"""Unsigned 8-bit integer (:math:`0 \leq x < 2^8`)"""
  291. return 0 <= value < 2**8
  292. def uint16(value: builtins.int) -> bool:
  293. r"""Unsigned 16-bit integer (:math:`0 \leq x < 2^{16}`)"""
  294. return 0 <= value < 2**16
  295. def uint32(value: builtins.int) -> bool:
  296. r"""Unsigned 32-bit integer (:math:`0 \leq x < 2^{32}`)"""
  297. return 0 <= value < 2**32
  298. def uint64(value: builtins.int) -> bool:
  299. r"""Unsigned 64-bit integer (:math:`0 \leq x < 2^{64}`)"""
  300. return 0 <= value < 2**64
  301. def uint(value: builtins.int) -> bool:
  302. r"""Signed 64-bit integer (:math:`0 \leq x < 2^{64}`)"""
  303. return 0 <= value < 2**64
  304. def int8(value: builtins.int) -> bool:
  305. r"""Signed 8-bit integer (:math:`-2^{7} \leq x < 2^{7}`)"""
  306. return -(2**7) <= value < 2**7
  307. def int16(value: builtins.int) -> bool:
  308. r"""Signed 16-bit integer (:math:`-2^{15} \leq x < 2^{15}`)"""
  309. return -(2**15) <= value < 2**15
  310. def int32(value: builtins.int) -> bool:
  311. r"""Signed 32-bit integer (:math:`-2^{31} \leq x < 2^{31}`)"""
  312. return -(2**31) <= value < 2**31
  313. def int64(value: builtins.int) -> bool:
  314. r"""Signed 64-bit integer (:math:`-2^{63} \leq x < 2^{63}`)"""
  315. return -(2**63) <= value < 2**63
  316. def int(value: builtins.int) -> bool:
  317. r"""Signed 64-bit integer (:math:`-2^{63} \leq x < 2^{63}`)"""
  318. return -(2**63) <= value < 2**63
  319. try:
  320. from packaging import licenses as _licenses
  321. def SPDX(value: str) -> bool:
  322. """See :ref:`PyPA's License-Expression specification
  323. <pypa:core-metadata-license-expression>` (added in :pep:`639`).
  324. """
  325. try:
  326. _licenses.canonicalize_license_expression(value)
  327. except _licenses.InvalidLicenseExpression:
  328. return False
  329. return True
  330. except ImportError: # pragma: no cover
  331. _logger.warning(
  332. "Could not find an up-to-date installation of `packaging`. "
  333. "License expressions might not be validated. "
  334. "To enforce validation, please install `packaging>=24.2`."
  335. )
  336. def SPDX(value: str) -> bool: # noqa: ARG001
  337. return True
  338. VALID_IMPORT_NAME = re.compile(
  339. r"""
  340. ^ # start of string
  341. [A-Za-z_][A-Za-z_0-9]+ # a valid Python identifier
  342. (?:\.[A-Za-z_][A-Za-z_0-9]*)* # optionally followed by .identifier's
  343. (?:\s*;\s*private)? # optionally followed by ; private
  344. $ # end of string
  345. """,
  346. re.VERBOSE,
  347. )
  348. def import_name(value: str) -> bool:
  349. """This is a valid import name. It has to be series of python identifiers
  350. (not keywords), separated by dots, optionally followed by a semicolon and
  351. the keyword "private".
  352. """
  353. if VALID_IMPORT_NAME.match(value) is None:
  354. return False
  355. idents, _, _ = value.partition(";")
  356. return all(not keyword.iskeyword(ident) for ident in idents.rstrip().split("."))