schema.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. """Event schema objects."""
  2. from __future__ import annotations
  3. import json
  4. from pathlib import Path, PurePath
  5. from typing import Any, Union
  6. from jsonschema import FormatChecker, validators
  7. from referencing import Registry
  8. from referencing.jsonschema import DRAFT7
  9. try:
  10. from jsonschema.protocols import Validator
  11. except ImportError:
  12. Validator = Any # type:ignore[assignment, misc]
  13. from . import yaml
  14. from .validators import draft7_format_checker, validate_schema
  15. class EventSchemaUnrecognized(Exception):
  16. """An error for an unrecognized event schema."""
  17. class EventSchemaLoadingError(Exception):
  18. """An error for an event schema loading error."""
  19. class EventSchemaFileAbsent(Exception):
  20. """An error for an absent event schema file."""
  21. SchemaType = Union[dict[str, Any], str, PurePath]
  22. class EventSchema:
  23. """A validated schema that can be used.
  24. On instantiation, validate the schema against
  25. Jupyter Event's metaschema.
  26. Parameters
  27. ----------
  28. schema: dict or str
  29. JSON schema to validate against Jupyter Events.
  30. validator_class: jsonschema.validators
  31. The validator class from jsonschema used to validate instances
  32. of this event schema. The schema itself will be validated
  33. against Jupyter Event's metaschema to ensure that
  34. any schema registered here follows the expected form
  35. of Jupyter Events.
  36. registry:
  37. Registry for nested JSON schema references.
  38. """
  39. def __init__(
  40. self,
  41. schema: SchemaType,
  42. validator_class: type[Validator] = validators.Draft7Validator, # type:ignore[assignment]
  43. format_checker: FormatChecker = draft7_format_checker,
  44. registry: Registry[Any] | None = None,
  45. ):
  46. """Initialize an event schema."""
  47. _schema = self._load_schema(schema)
  48. # Validate the schema against Jupyter Events metaschema.
  49. validate_schema(_schema)
  50. if registry is None:
  51. registry = DRAFT7.create_resource(_schema) @ Registry()
  52. # Create a validator for this schema
  53. self._validator = validator_class(_schema, registry=registry, format_checker=format_checker) # type: ignore[call-arg]
  54. self._schema = _schema
  55. def __repr__(self) -> str:
  56. """A string repr for an event schema."""
  57. return json.dumps(self._schema, indent=2)
  58. @staticmethod
  59. def _ensure_yaml_loaded(schema: SchemaType, was_str: bool = False) -> None:
  60. """Ensures schema was correctly loaded into a dictionary. Raises
  61. EventSchemaLoadingError otherwise."""
  62. if isinstance(schema, dict):
  63. return
  64. error_msg = "Could not deserialize schema into a dictionary."
  65. def intended_as_path(schema: str) -> bool:
  66. path = Path(schema)
  67. return path.match("*.yml") or path.match("*.yaml") or path.match("*.json")
  68. # detect whether the user specified a string but intended a PurePath to
  69. # generate a more helpful error message
  70. if was_str and intended_as_path(schema): # type:ignore[arg-type]
  71. error_msg += " Paths to schema files must be explicitly wrapped in a Pathlib object."
  72. else:
  73. error_msg += " Double check the schema and ensure it is in the proper form."
  74. raise EventSchemaLoadingError(error_msg)
  75. @staticmethod
  76. def _load_schema(schema: SchemaType) -> dict[str, Any]:
  77. """Load a JSON schema from different sources/data types.
  78. `schema` could be a dictionary or serialized string representing the
  79. schema itself or a Pathlib object representing a schema file on disk.
  80. Returns a dictionary with schema data.
  81. """
  82. # if schema is already a dictionary, return it
  83. if isinstance(schema, dict):
  84. return schema
  85. # if schema is PurePath, ensure file exists at path and then load from file
  86. if isinstance(schema, PurePath):
  87. if not Path(schema).exists():
  88. msg = f'Schema file not present at path "{schema}".'
  89. raise EventSchemaFileAbsent(msg)
  90. loaded_schema = yaml.load(schema)
  91. EventSchema._ensure_yaml_loaded(loaded_schema)
  92. return loaded_schema # type:ignore[no-any-return]
  93. # finally, if schema is string, attempt to deserialize and return the output
  94. if isinstance(schema, str):
  95. # note the diff b/w load v.s. loads
  96. loaded_schema = yaml.loads(schema)
  97. EventSchema._ensure_yaml_loaded(loaded_schema, was_str=True)
  98. return loaded_schema # type:ignore[no-any-return]
  99. msg = f"Expected a dictionary, string, or PurePath, but instead received {schema.__class__.__name__}." # type:ignore[unreachable]
  100. raise EventSchemaUnrecognized(msg)
  101. @property
  102. def id(self) -> str:
  103. """Schema $id field."""
  104. return self._schema["$id"] # type:ignore[no-any-return]
  105. @property
  106. def version(self) -> int:
  107. """Schema's version."""
  108. return self._schema["version"] # type:ignore[no-any-return]
  109. @property
  110. def properties(self) -> dict[str, Any]:
  111. return self._schema["properties"] # type:ignore[no-any-return]
  112. def validate(self, data: dict[str, Any]) -> None:
  113. """Validate an incoming instance of this event schema."""
  114. self._validator.validate(data)