__init__.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. #######################################################################################
  2. #
  3. # Adapted from:
  4. # https://github.com/pypa/hatch/blob/5352e44/backend/src/hatchling/licenses/parse.py
  5. #
  6. # MIT License
  7. #
  8. # Copyright (c) 2017-present Ofek Lev <oss@ofek.dev>
  9. #
  10. # Permission is hereby granted, free of charge, to any person obtaining a copy of this
  11. # software and associated documentation files (the "Software"), to deal in the Software
  12. # without restriction, including without limitation the rights to use, copy, modify,
  13. # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
  14. # permit persons to whom the Software is furnished to do so, subject to the following
  15. # conditions:
  16. #
  17. # The above copyright notice and this permission notice shall be included in all copies
  18. # or substantial portions of the Software.
  19. #
  20. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  21. # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  22. # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  23. # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  24. # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
  25. # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26. #
  27. #
  28. # With additional allowance of arbitrary `LicenseRef-` identifiers, not just
  29. # `LicenseRef-Public-Domain` and `LicenseRef-Proprietary`.
  30. #
  31. #######################################################################################
  32. from __future__ import annotations
  33. import re
  34. from typing import NewType, cast
  35. from ._spdx import EXCEPTIONS, LICENSES
  36. __all__ = [
  37. "InvalidLicenseExpression",
  38. "NormalizedLicenseExpression",
  39. "canonicalize_license_expression",
  40. ]
  41. license_ref_allowed = re.compile("^[A-Za-z0-9.-]*$")
  42. NormalizedLicenseExpression = NewType("NormalizedLicenseExpression", str)
  43. class InvalidLicenseExpression(ValueError):
  44. """Raised when a license-expression string is invalid
  45. >>> canonicalize_license_expression("invalid")
  46. Traceback (most recent call last):
  47. ...
  48. packaging.licenses.InvalidLicenseExpression: Invalid license expression: 'invalid'
  49. """
  50. def canonicalize_license_expression(
  51. raw_license_expression: str,
  52. ) -> NormalizedLicenseExpression:
  53. if not raw_license_expression:
  54. message = f"Invalid license expression: {raw_license_expression!r}"
  55. raise InvalidLicenseExpression(message)
  56. # Pad any parentheses so tokenization can be achieved by merely splitting on
  57. # whitespace.
  58. license_expression = raw_license_expression.replace("(", " ( ").replace(")", " ) ")
  59. licenseref_prefix = "LicenseRef-"
  60. license_refs = {
  61. ref.lower(): "LicenseRef-" + ref[len(licenseref_prefix) :]
  62. for ref in license_expression.split()
  63. if ref.lower().startswith(licenseref_prefix.lower())
  64. }
  65. # Normalize to lower case so we can look up licenses/exceptions
  66. # and so boolean operators are Python-compatible.
  67. license_expression = license_expression.lower()
  68. tokens = license_expression.split()
  69. # Rather than implementing a parenthesis/boolean logic parser, create an
  70. # expression that Python can parse. Everything that is not involved with the
  71. # grammar itself is replaced with the placeholder `False` and the resultant
  72. # expression should become a valid Python expression.
  73. python_tokens = []
  74. for token in tokens:
  75. if token not in {"or", "and", "with", "(", ")"}:
  76. python_tokens.append("False")
  77. elif token == "with":
  78. python_tokens.append("or")
  79. elif (
  80. token == "("
  81. and python_tokens
  82. and python_tokens[-1] not in {"or", "and", "("}
  83. ) or (token == ")" and python_tokens and python_tokens[-1] == "("):
  84. message = f"Invalid license expression: {raw_license_expression!r}"
  85. raise InvalidLicenseExpression(message)
  86. else:
  87. python_tokens.append(token)
  88. python_expression = " ".join(python_tokens)
  89. try:
  90. compile(python_expression, "", "eval")
  91. except SyntaxError:
  92. message = f"Invalid license expression: {raw_license_expression!r}"
  93. raise InvalidLicenseExpression(message) from None
  94. # Take a final pass to check for unknown licenses/exceptions.
  95. normalized_tokens = []
  96. for token in tokens:
  97. if token in {"or", "and", "with", "(", ")"}:
  98. normalized_tokens.append(token.upper())
  99. continue
  100. if normalized_tokens and normalized_tokens[-1] == "WITH":
  101. if token not in EXCEPTIONS:
  102. message = f"Unknown license exception: {token!r}"
  103. raise InvalidLicenseExpression(message)
  104. normalized_tokens.append(EXCEPTIONS[token]["id"])
  105. else:
  106. if token.endswith("+"):
  107. final_token = token[:-1]
  108. suffix = "+"
  109. else:
  110. final_token = token
  111. suffix = ""
  112. if final_token.startswith("licenseref-"):
  113. if not license_ref_allowed.match(final_token):
  114. message = f"Invalid licenseref: {final_token!r}"
  115. raise InvalidLicenseExpression(message)
  116. normalized_tokens.append(license_refs[final_token] + suffix)
  117. else:
  118. if final_token not in LICENSES:
  119. message = f"Unknown license: {final_token!r}"
  120. raise InvalidLicenseExpression(message)
  121. normalized_tokens.append(LICENSES[final_token]["id"] + suffix)
  122. normalized_expression = " ".join(normalized_tokens)
  123. return cast(
  124. "NormalizedLicenseExpression",
  125. normalized_expression.replace("( ", "(").replace(" )", ")"),
  126. )