| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- #######################################################################################
- #
- # Adapted from:
- # https://github.com/pypa/hatch/blob/5352e44/backend/src/hatchling/licenses/parse.py
- #
- # MIT License
- #
- # Copyright (c) 2017-present Ofek Lev <oss@ofek.dev>
- #
- # Permission is hereby granted, free of charge, to any person obtaining a copy of this
- # software and associated documentation files (the "Software"), to deal in the Software
- # without restriction, including without limitation the rights to use, copy, modify,
- # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
- # permit persons to whom the Software is furnished to do so, subject to the following
- # conditions:
- #
- # The above copyright notice and this permission notice shall be included in all copies
- # or substantial portions of the Software.
- #
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
- # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
- # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
- # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
- # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #
- #
- # With additional allowance of arbitrary `LicenseRef-` identifiers, not just
- # `LicenseRef-Public-Domain` and `LicenseRef-Proprietary`.
- #
- #######################################################################################
- from __future__ import annotations
- import re
- from typing import NewType, cast
- from ._spdx import EXCEPTIONS, LICENSES
- __all__ = [
- "InvalidLicenseExpression",
- "NormalizedLicenseExpression",
- "canonicalize_license_expression",
- ]
- license_ref_allowed = re.compile("^[A-Za-z0-9.-]*$")
- NormalizedLicenseExpression = NewType("NormalizedLicenseExpression", str)
- class InvalidLicenseExpression(ValueError):
- """Raised when a license-expression string is invalid
- >>> canonicalize_license_expression("invalid")
- Traceback (most recent call last):
- ...
- packaging.licenses.InvalidLicenseExpression: Invalid license expression: 'invalid'
- """
- def canonicalize_license_expression(
- raw_license_expression: str,
- ) -> NormalizedLicenseExpression:
- if not raw_license_expression:
- message = f"Invalid license expression: {raw_license_expression!r}"
- raise InvalidLicenseExpression(message)
- # Pad any parentheses so tokenization can be achieved by merely splitting on
- # whitespace.
- license_expression = raw_license_expression.replace("(", " ( ").replace(")", " ) ")
- licenseref_prefix = "LicenseRef-"
- license_refs = {
- ref.lower(): "LicenseRef-" + ref[len(licenseref_prefix) :]
- for ref in license_expression.split()
- if ref.lower().startswith(licenseref_prefix.lower())
- }
- # Normalize to lower case so we can look up licenses/exceptions
- # and so boolean operators are Python-compatible.
- license_expression = license_expression.lower()
- tokens = license_expression.split()
- # Rather than implementing a parenthesis/boolean logic parser, create an
- # expression that Python can parse. Everything that is not involved with the
- # grammar itself is replaced with the placeholder `False` and the resultant
- # expression should become a valid Python expression.
- python_tokens = []
- for token in tokens:
- if token not in {"or", "and", "with", "(", ")"}:
- python_tokens.append("False")
- elif token == "with":
- python_tokens.append("or")
- elif (
- token == "("
- and python_tokens
- and python_tokens[-1] not in {"or", "and", "("}
- ) or (token == ")" and python_tokens and python_tokens[-1] == "("):
- message = f"Invalid license expression: {raw_license_expression!r}"
- raise InvalidLicenseExpression(message)
- else:
- python_tokens.append(token)
- python_expression = " ".join(python_tokens)
- try:
- compile(python_expression, "", "eval")
- except SyntaxError:
- message = f"Invalid license expression: {raw_license_expression!r}"
- raise InvalidLicenseExpression(message) from None
- # Take a final pass to check for unknown licenses/exceptions.
- normalized_tokens = []
- for token in tokens:
- if token in {"or", "and", "with", "(", ")"}:
- normalized_tokens.append(token.upper())
- continue
- if normalized_tokens and normalized_tokens[-1] == "WITH":
- if token not in EXCEPTIONS:
- message = f"Unknown license exception: {token!r}"
- raise InvalidLicenseExpression(message)
- normalized_tokens.append(EXCEPTIONS[token]["id"])
- else:
- if token.endswith("+"):
- final_token = token[:-1]
- suffix = "+"
- else:
- final_token = token
- suffix = ""
- if final_token.startswith("licenseref-"):
- if not license_ref_allowed.match(final_token):
- message = f"Invalid licenseref: {final_token!r}"
- raise InvalidLicenseExpression(message)
- normalized_tokens.append(license_refs[final_token] + suffix)
- else:
- if final_token not in LICENSES:
- message = f"Unknown license: {final_token!r}"
- raise InvalidLicenseExpression(message)
- normalized_tokens.append(LICENSES[final_token]["id"] + suffix)
- normalized_expression = " ".join(normalized_tokens)
- return cast(
- "NormalizedLicenseExpression",
- normalized_expression.replace("( ", "(").replace(" )", ")"),
- )
|