jsonutil.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. """Utilities to manipulate JSON objects."""
  2. # Copyright (c) IPython Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. import math
  5. import numbers
  6. import re
  7. import types
  8. from binascii import b2a_base64
  9. from datetime import date, datetime
  10. from jupyter_client._version import version_info as jupyter_client_version
  11. next_attr_name = "__next__"
  12. # -----------------------------------------------------------------------------
  13. # Globals and constants
  14. # -----------------------------------------------------------------------------
  15. # timestamp formats
  16. ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
  17. ISO8601_PAT = re.compile(
  18. r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$"
  19. )
  20. # holy crap, strptime is not threadsafe.
  21. # Calling it once at import seems to help.
  22. datetime.strptime("2000-01-01", "%Y-%m-%d")
  23. # -----------------------------------------------------------------------------
  24. # Classes and functions
  25. # -----------------------------------------------------------------------------
  26. # constants for identifying png/jpeg data
  27. PNG = b"\x89PNG\r\n\x1a\n"
  28. # front of PNG base64-encoded
  29. PNG64 = b"iVBORw0KG"
  30. JPEG = b"\xff\xd8"
  31. # front of JPEG base64-encoded
  32. JPEG64 = b"/9"
  33. # constants for identifying gif data
  34. GIF_64 = b"R0lGODdh"
  35. GIF89_64 = b"R0lGODlh"
  36. # front of PDF base64-encoded
  37. PDF64 = b"JVBER"
  38. JUPYTER_CLIENT_MAJOR_VERSION = jupyter_client_version[0]
  39. def encode_images(format_dict):
  40. """b64-encodes images in a displaypub format dict
  41. Perhaps this should be handled in json_clean itself?
  42. Parameters
  43. ----------
  44. format_dict : dict
  45. A dictionary of display data keyed by mime-type
  46. Returns
  47. -------
  48. format_dict : dict
  49. A copy of the same dictionary,
  50. but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
  51. is base64-encoded.
  52. """
  53. # no need for handling of ambiguous bytestrings on Python 3,
  54. # where bytes objects always represent binary data and thus
  55. # base64-encoded.
  56. return format_dict
  57. def json_clean(obj): # pragma: no cover
  58. """Deprecated, this is a no-op for jupyter-client>=7.
  59. Clean an object to ensure it's safe to encode in JSON.
  60. Atomic, immutable objects are returned unmodified. Sets and tuples are
  61. converted to lists, lists are copied and dicts are also copied.
  62. Note: dicts whose keys could cause collisions upon encoding (such as a dict
  63. with both the number 1 and the string '1' as keys) will cause a ValueError
  64. to be raised.
  65. Parameters
  66. ----------
  67. obj : any python object
  68. Returns
  69. -------
  70. out : object
  71. A version of the input which will not cause an encoding error when
  72. encoded as JSON. Note that this function does not *encode* its inputs,
  73. it simply sanitizes it so that there will be no encoding errors later.
  74. """
  75. if int(JUPYTER_CLIENT_MAJOR_VERSION) >= 7:
  76. return obj
  77. # types that are 'atomic' and ok in json as-is.
  78. atomic_ok = (str, type(None))
  79. # containers that we need to convert into lists
  80. container_to_list = (tuple, set, types.GeneratorType)
  81. # Since bools are a subtype of Integrals, which are a subtype of Reals,
  82. # we have to check them in that order.
  83. if isinstance(obj, bool):
  84. return obj
  85. if isinstance(obj, numbers.Integral):
  86. # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
  87. return int(obj)
  88. if isinstance(obj, numbers.Real):
  89. # cast out-of-range floats to their reprs
  90. if math.isnan(obj) or math.isinf(obj):
  91. return repr(obj)
  92. return float(obj)
  93. if isinstance(obj, atomic_ok):
  94. return obj
  95. if isinstance(obj, bytes):
  96. # unanmbiguous binary data is base64-encoded
  97. # (this probably should have happened upstream)
  98. return b2a_base64(obj).decode("ascii")
  99. if isinstance(obj, container_to_list) or (
  100. hasattr(obj, "__iter__") and hasattr(obj, next_attr_name)
  101. ):
  102. obj = list(obj)
  103. if isinstance(obj, list):
  104. return [json_clean(x) for x in obj]
  105. if isinstance(obj, dict):
  106. # First, validate that the dict won't lose data in conversion due to
  107. # key collisions after stringification. This can happen with keys like
  108. # True and 'true' or 1 and '1', which collide in JSON.
  109. nkeys = len(obj)
  110. nkeys_collapsed = len(set(map(str, obj)))
  111. if nkeys != nkeys_collapsed:
  112. msg = (
  113. "dict cannot be safely converted to JSON: "
  114. "key collision would lead to dropped values"
  115. )
  116. raise ValueError(msg)
  117. # If all OK, proceed by making the new dict that will be json-safe
  118. out = {}
  119. for k, v in obj.items():
  120. out[str(k)] = json_clean(v)
  121. return out
  122. if isinstance(obj, datetime | date):
  123. return obj.strftime(ISO8601)
  124. # we don't understand it, it's probably an unserializable object
  125. raise ValueError("Can't clean for JSON: %r" % obj)