jsonutil.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. """Utilities to manipulate JSON objects."""
  2. # NOTE: this is a copy of ipykernel/jsonutils.py (+blackified)
  3. # Copyright (c) IPython Development Team.
  4. # Distributed under the terms of the Modified BSD License.
  5. from __future__ import annotations
  6. import math
  7. import numbers
  8. import re
  9. import types
  10. from binascii import b2a_base64
  11. from datetime import datetime
  12. from typing import Any
  13. # -----------------------------------------------------------------------------
  14. # Globals and constants
  15. # -----------------------------------------------------------------------------
  16. # timestamp formats
  17. ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
  18. ISO8601_PAT = re.compile(
  19. r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$"
  20. )
  21. # holy crap, strptime is not threadsafe.
  22. # Calling it once at import seems to help.
  23. datetime.strptime("2000-01-01", "%Y-%m-%d")
  24. # -----------------------------------------------------------------------------
  25. # Classes and functions
  26. # -----------------------------------------------------------------------------
  27. # constants for identifying png/jpeg data
  28. PNG = b"\x89PNG\r\n\x1a\n"
  29. # front of PNG base64-encoded
  30. PNG64 = b"iVBORw0KG"
  31. JPEG = b"\xff\xd8"
  32. # front of JPEG base64-encoded
  33. JPEG64 = b"/9"
  34. # constants for identifying gif data
  35. GIF_64 = b"R0lGODdh"
  36. GIF89_64 = b"R0lGODlh"
  37. # front of PDF base64-encoded
  38. PDF64 = b"JVBER"
  39. def encode_images(format_dict: dict[str, str]) -> dict[str, str]:
  40. """b64-encodes images in a displaypub format dict
  41. Perhaps this should be handled in json_clean itself?
  42. Parameters
  43. ----------
  44. format_dict : dict
  45. A dictionary of display data keyed by mime-type
  46. Returns
  47. -------
  48. format_dict : dict
  49. A copy of the same dictionary,
  50. but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
  51. is base64-encoded.
  52. """
  53. return format_dict
  54. def json_clean(obj: Any) -> Any:
  55. """Clean an object to ensure it's safe to encode in JSON.
  56. Atomic, immutable objects are returned unmodified. Sets and tuples are
  57. converted to lists, lists are copied and dicts are also copied.
  58. Note: dicts whose keys could cause collisions upon encoding (such as a dict
  59. with both the number 1 and the string '1' as keys) will cause a ValueError
  60. to be raised.
  61. Parameters
  62. ----------
  63. obj : any python object
  64. Returns
  65. -------
  66. out : object
  67. A version of the input which will not cause an encoding error when
  68. encoded as JSON. Note that this function does not *encode* its inputs,
  69. it simply sanitizes it so that there will be no encoding errors later.
  70. """
  71. # types that are 'atomic' and ok in json as-is.
  72. atomic_ok = (str, type(None))
  73. # containers that we need to convert into lists
  74. container_to_list = (tuple, set, types.GeneratorType)
  75. # Since bools are a subtype of Integrals, which are a subtype of Reals,
  76. # we have to check them in that order.
  77. if isinstance(obj, bool):
  78. return obj
  79. if isinstance(obj, numbers.Integral):
  80. # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
  81. return int(obj)
  82. if isinstance(obj, numbers.Real):
  83. # cast out-of-range floats to their reprs
  84. if math.isnan(obj) or math.isinf(obj):
  85. return repr(obj)
  86. return float(obj)
  87. if isinstance(obj, atomic_ok):
  88. return obj
  89. if isinstance(obj, bytes):
  90. return b2a_base64(obj).decode("ascii")
  91. if isinstance(obj, container_to_list) or (
  92. hasattr(obj, "__iter__") and hasattr(obj, "__next__")
  93. ):
  94. obj = list(obj)
  95. if isinstance(obj, list):
  96. return [json_clean(x) for x in obj]
  97. if isinstance(obj, dict):
  98. # First, validate that the dict won't lose data in conversion due to
  99. # key collisions after stringification. This can happen with keys like
  100. # True and 'true' or 1 and '1', which collide in JSON.
  101. nkeys = len(obj)
  102. nkeys_collapsed = len(set(map(str, obj)))
  103. if nkeys != nkeys_collapsed:
  104. raise ValueError(
  105. "dict cannot be safely converted to JSON: "
  106. "key collision would lead to dropped values"
  107. )
  108. # If all OK, proceed by making the new dict that will be json-safe
  109. out = {}
  110. for k, v in iter(obj.items()):
  111. out[str(k)] = json_clean(v)
  112. return out
  113. if isinstance(obj, datetime):
  114. return obj.strftime(ISO8601)
  115. # we don't understand it, it's probably an unserializable object
  116. raise ValueError("Can't clean for JSON: %r" % obj)