__init__.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. # ___
  2. # \./ DANGER: This project implements some code generation
  3. # .--.O.--. techniques involving string concatenation.
  4. # \/ \/ If you look at it, you might die.
  5. #
  6. r"""
  7. Installation
  8. ************
  9. .. code-block:: bash
  10. pip install fastjsonschema
  11. Support only for Python 3.3 and higher.
  12. About
  13. *****
  14. ``fastjsonschema`` implements validation of JSON documents by JSON schema.
  15. The library implements JSON schema drafts 04, 06, and 07. The main purpose is
  16. to have a really fast implementation. See some numbers:
  17. * Probably the most popular, ``jsonschema``, can take up to 5 seconds for valid
  18. inputs and 1.2 seconds for invalid inputs.
  19. * Second most popular, ``json-spec``, is even worse with up to 7.2 and 1.7 seconds.
  20. * Last ``validictory``, now deprecated, is much better with 370 or 23 milliseconds,
  21. but it does not follow all standards, and it can be still slow for some purposes.
  22. With this library you can gain big improvements as ``fastjsonschema`` takes
  23. only about 25 milliseconds for valid inputs and 2 milliseconds for invalid ones.
  24. Pretty amazing, right? :-)
  25. Technically it works by generating the most stupid code on the fly, which is fast but
  26. is hard to write by hand. The best efficiency is achieved when a validator is compiled
  27. once and used many times, of course. It works similarly like regular expressions. But
  28. you can also generate the code to a file, which is even slightly faster.
  29. You can run the performance benchmarks on your computer or server with the included
  30. script:
  31. .. code-block:: bash
  32. $ make performance
  33. fast_compiled valid ==> 0.0993900
  34. fast_compiled invalid ==> 0.0041089
  35. fast_compiled_without_exc valid ==> 0.0465258
  36. fast_compiled_without_exc invalid ==> 0.0023688
  37. fast_file valid ==> 0.0989483
  38. fast_file invalid ==> 0.0041104
  39. fast_not_compiled valid ==> 11.9572681
  40. fast_not_compiled invalid ==> 2.9512092
  41. jsonschema valid ==> 5.2233240
  42. jsonschema invalid ==> 1.3227916
  43. jsonschema_compiled valid ==> 0.4447982
  44. jsonschema_compiled invalid ==> 0.0231333
  45. jsonspec valid ==> 4.1450569
  46. jsonspec invalid ==> 1.0485777
  47. validictory valid ==> 0.2730411
  48. validictory invalid ==> 0.0183669
  49. This library follows and implements `JSON schema draft-04, draft-06, and draft-07
  50. <http://json-schema.org>`_. Sometimes it's not perfectly clear, so I recommend also
  51. check out this `understanding JSON schema <https://spacetelescope.github.io/understanding-json-schema>`_.
  52. Note that there are some differences compared to JSON schema standard:
  53. * Regular expressions are full Python ones, not only what JSON schema allows. It's easier
  54. to allow everything, and also it's faster to compile without limits. So keep in mind that when
  55. you will use a more advanced regular expression, it may not work with other libraries or in
  56. other languages.
  57. * Because Python matches new line for a dollar in regular expressions (``a$`` matches ``a`` and ``a\\n``),
  58. instead of ``$`` is used ``\Z`` and all dollars in your regular expression are changed to ``\\Z``
  59. as well. When you want to use dollar as regular character, you have to escape it (``\$``).
  60. * JSON schema says you can use keyword ``default`` for providing default values. This implementation
  61. uses that and always returns transformed input data.
  62. Usage
  63. *****
  64. .. code-block:: python
  65. import fastjsonschema
  66. point_schema = {
  67. "type": "object",
  68. "properties": {
  69. "x": {
  70. "type": "number",
  71. },
  72. "y": {
  73. "type": "number",
  74. },
  75. },
  76. "required": ["x", "y"],
  77. "additionalProperties": False,
  78. }
  79. point_validator = fastjsonschema.compile(point_schema)
  80. try:
  81. point_validator({"x": 1.0, "y": 2.0})
  82. except fastjsonschema.JsonSchemaException as e:
  83. print(f"Data failed validation: {e}")
  84. API
  85. ***
  86. """
  87. from functools import partial, update_wrapper
  88. from .draft04 import CodeGeneratorDraft04
  89. from .draft06 import CodeGeneratorDraft06
  90. from .draft07 import CodeGeneratorDraft07
  91. from .exceptions import JsonSchemaException, JsonSchemaValueException, JsonSchemaDefinitionException
  92. from .ref_resolver import RefResolver
  93. from .version import VERSION
  94. __all__ = (
  95. 'VERSION',
  96. 'JsonSchemaException',
  97. 'JsonSchemaValueException',
  98. 'JsonSchemaDefinitionException',
  99. 'validate',
  100. 'compile',
  101. 'compile_to_code',
  102. )
  103. def validate(definition, data, handlers={}, formats={}, use_default=True, use_formats=True, detailed_exceptions=True):
  104. """
  105. Validation function for lazy programmers or for use cases when you need
  106. to call validation only once, so you do not have to compile it first.
  107. Use it only when you do not care about performance (even though it will
  108. be still faster than alternative implementations).
  109. .. code-block:: python
  110. import fastjsonschema
  111. fastjsonschema.validate({'type': 'string'}, 'hello')
  112. # same as: compile({'type': 'string'})('hello')
  113. Preferred is to use :any:`compile` function.
  114. """
  115. return compile(definition, handlers, formats, use_default, use_formats, detailed_exceptions)(data)
  116. #TODO: Change use_default to False when upgrading to version 3.
  117. # pylint: disable=redefined-builtin,dangerous-default-value,exec-used
  118. def compile(definition, handlers={}, formats={}, use_default=True, use_formats=True, detailed_exceptions=True):
  119. """
  120. Generates validation function for validating JSON schema passed in ``definition``.
  121. Example:
  122. .. code-block:: python
  123. import fastjsonschema
  124. validate = fastjsonschema.compile({'type': 'string'})
  125. validate('hello')
  126. This implementation supports keyword ``default`` (can be turned off
  127. by passing `use_default=False`):
  128. .. code-block:: python
  129. validate = fastjsonschema.compile({
  130. 'type': 'object',
  131. 'properties': {
  132. 'a': {'type': 'number', 'default': 42},
  133. },
  134. })
  135. data = validate({})
  136. assert data == {'a': 42}
  137. Supported implementations are draft-04, draft-06 and draft-07. Which version
  138. should be used is determined by `$draft` in your ``definition``. When not
  139. specified, the latest implementation is used (draft-07).
  140. .. code-block:: python
  141. validate = fastjsonschema.compile({
  142. '$schema': 'http://json-schema.org/draft-04/schema',
  143. 'type': 'number',
  144. })
  145. You can pass mapping from URI to function that should be used to retrieve
  146. remote schemes used in your ``definition`` in parameter ``handlers``.
  147. Also, you can pass mapping for custom formats. Key is the name of your
  148. formatter and value can be regular expression, which will be compiled or
  149. callback returning `bool` (or you can raise your own exception).
  150. .. code-block:: python
  151. validate = fastjsonschema.compile(definition, formats={
  152. 'foo': r'foo|bar',
  153. 'bar': lambda value: value in ('foo', 'bar'),
  154. })
  155. Note that formats are automatically used as assertions. It can be turned
  156. off by passing `use_formats=False`. When disabled, custom formats are
  157. disabled as well. (Added in 2.19.0.)
  158. If you don't need detailed exceptions, you can turn the details off and gain
  159. additional performance by passing `detailed_exceptions=False`.
  160. Exception :any:`JsonSchemaDefinitionException` is raised when generating the
  161. code fails (bad definition).
  162. Exception :any:`JsonSchemaValueException` is raised from generated function when
  163. validation fails (data do not follow the definition).
  164. """
  165. resolver, code_generator = _factory(definition, handlers, formats, use_default, use_formats, detailed_exceptions)
  166. global_state = code_generator.global_state
  167. # Do not pass local state so it can recursively call itself.
  168. exec(code_generator.func_code, global_state)
  169. func = global_state[resolver.get_scope_name()]
  170. if formats:
  171. return update_wrapper(partial(func, custom_formats=formats), func)
  172. return func
  173. # pylint: disable=dangerous-default-value
  174. def compile_to_code(definition, handlers={}, formats={}, use_default=True, use_formats=True, detailed_exceptions=True):
  175. """
  176. Generates validation code for validating JSON schema passed in ``definition``.
  177. Example:
  178. .. code-block:: python
  179. import fastjsonschema
  180. code = fastjsonschema.compile_to_code({'type': 'string'})
  181. with open('your_file.py', 'w') as f:
  182. f.write(code)
  183. You can also use it as a script:
  184. .. code-block:: bash
  185. echo "{'type': 'string'}" | python3 -m fastjsonschema > your_file.py
  186. python3 -m fastjsonschema "{'type': 'string'}" > your_file.py
  187. Exception :any:`JsonSchemaDefinitionException` is raised when generating the
  188. code fails (bad definition).
  189. """
  190. _, code_generator = _factory(definition, handlers, formats, use_default, use_formats, detailed_exceptions)
  191. return (
  192. 'VERSION = "' + VERSION + '"\n' +
  193. code_generator.global_state_code + '\n' +
  194. code_generator.func_code
  195. )
  196. def _factory(definition, handlers, formats={}, use_default=True, use_formats=True, detailed_exceptions=True):
  197. resolver = RefResolver.from_schema(definition, handlers=handlers, store={})
  198. code_generator = _get_code_generator_class(definition)(
  199. definition,
  200. resolver=resolver,
  201. formats=formats,
  202. use_default=use_default,
  203. use_formats=use_formats,
  204. detailed_exceptions=detailed_exceptions,
  205. )
  206. return resolver, code_generator
  207. def _get_code_generator_class(schema):
  208. # Schema in from draft-06 can be just the boolean value.
  209. if isinstance(schema, dict):
  210. schema_version = schema.get('$schema', '')
  211. if 'draft-04' in schema_version:
  212. return CodeGeneratorDraft04
  213. if 'draft-06' in schema_version:
  214. return CodeGeneratorDraft06
  215. return CodeGeneratorDraft07