rwbase.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. """Base classes and utilities for readers and writers."""
  2. # Copyright (c) IPython Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. from __future__ import annotations
  5. from base64 import decodebytes, encodebytes
  6. def restore_bytes(nb):
  7. """Restore bytes of image data from unicode-only formats.
  8. Base64 encoding is handled elsewhere. Bytes objects in the notebook are
  9. always b64-encoded. We DO NOT encode/decode around file formats.
  10. Note: this is never used
  11. """
  12. for ws in nb.worksheets:
  13. for cell in ws.cells:
  14. if cell.cell_type == "code":
  15. for output in cell.outputs:
  16. if "png" in output:
  17. output.png = output.png.encode("ascii", "replace")
  18. if "jpeg" in output:
  19. output.jpeg = output.jpeg.encode("ascii", "replace")
  20. return nb
  21. # output keys that are likely to have multiline values
  22. _multiline_outputs = ["text", "html", "svg", "latex", "javascript", "json"]
  23. # FIXME: workaround for old splitlines()
  24. def _join_lines(lines):
  25. """join lines that have been written by splitlines()
  26. Has logic to protect against `splitlines()`, which
  27. should have been `splitlines(True)`
  28. """
  29. if lines and lines[0].endswith(("\n", "\r")):
  30. # created by splitlines(True)
  31. return "".join(lines)
  32. # created by splitlines()
  33. return "\n".join(lines)
  34. def rejoin_lines(nb):
  35. """rejoin multiline text into strings
  36. For reversing effects of ``split_lines(nb)``.
  37. This only rejoins lines that have been split, so if text objects were not split
  38. they will pass through unchanged.
  39. Used when reading JSON files that may have been passed through split_lines.
  40. """
  41. for ws in nb.worksheets:
  42. for cell in ws.cells:
  43. if cell.cell_type == "code":
  44. if "input" in cell and isinstance(cell.input, list):
  45. cell.input = _join_lines(cell.input)
  46. for output in cell.outputs:
  47. for key in _multiline_outputs:
  48. item = output.get(key, None)
  49. if isinstance(item, list):
  50. output[key] = _join_lines(item)
  51. else: # text, heading cell
  52. for key in ["source", "rendered"]:
  53. item = cell.get(key, None)
  54. if isinstance(item, list):
  55. cell[key] = _join_lines(item)
  56. return nb
  57. def split_lines(nb):
  58. """split likely multiline text into lists of strings
  59. For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
  60. reverse the effects of ``split_lines(nb)``.
  61. Used when writing JSON files.
  62. """
  63. for ws in nb.worksheets:
  64. for cell in ws.cells:
  65. if cell.cell_type == "code":
  66. if "input" in cell and isinstance(cell.input, str):
  67. cell.input = cell.input.splitlines(True)
  68. for output in cell.outputs:
  69. for key in _multiline_outputs:
  70. item = output.get(key, None)
  71. if isinstance(item, str):
  72. output[key] = item.splitlines(True)
  73. else: # text, heading cell
  74. for key in ["source", "rendered"]:
  75. item = cell.get(key, None)
  76. if isinstance(item, str):
  77. cell[key] = item.splitlines(True)
  78. return nb
  79. # b64 encode/decode are never actually used, because all bytes objects in
  80. # the notebook are already b64-encoded, and we don't need/want to double-encode
  81. def base64_decode(nb):
  82. """Restore all bytes objects in the notebook from base64-encoded strings.
  83. Note: This is never used
  84. """
  85. for ws in nb.worksheets:
  86. for cell in ws.cells:
  87. if cell.cell_type == "code":
  88. for output in cell.outputs:
  89. if "png" in output:
  90. if isinstance(output.png, str):
  91. output.png = output.png.encode("ascii")
  92. output.png = decodebytes(output.png)
  93. if "jpeg" in output:
  94. if isinstance(output.jpeg, str):
  95. output.jpeg = output.jpeg.encode("ascii")
  96. output.jpeg = decodebytes(output.jpeg)
  97. return nb
  98. def base64_encode(nb):
  99. """Base64 encode all bytes objects in the notebook.
  100. These will be b64-encoded unicode strings
  101. Note: This is never used
  102. """
  103. for ws in nb.worksheets:
  104. for cell in ws.cells:
  105. if cell.cell_type == "code":
  106. for output in cell.outputs:
  107. if "png" in output:
  108. output.png = encodebytes(output.png).decode("ascii")
  109. if "jpeg" in output:
  110. output.jpeg = encodebytes(output.jpeg).decode("ascii")
  111. return nb
  112. def strip_transient(nb):
  113. """Strip transient values that shouldn't be stored in files.
  114. This should be called in *both* read and write.
  115. """
  116. nb.pop("orig_nbformat", None)
  117. nb.pop("orig_nbformat_minor", None)
  118. for ws in nb["worksheets"]:
  119. for cell in ws["cells"]:
  120. cell.get("metadata", {}).pop("trusted", None)
  121. # strip cell.trusted even though it shouldn't be used,
  122. # since it's where the transient value used to be stored.
  123. cell.pop("trusted", None)
  124. return nb
  125. class NotebookReader:
  126. """A class for reading notebooks."""
  127. def reads(self, s, **kwargs):
  128. """Read a notebook from a string."""
  129. msg = "loads must be implemented in a subclass"
  130. raise NotImplementedError(msg)
  131. def read(self, fp, **kwargs):
  132. """Read a notebook from a file like object"""
  133. nbs = fp.read()
  134. return self.reads(nbs, **kwargs)
  135. class NotebookWriter:
  136. """A class for writing notebooks."""
  137. def writes(self, nb, **kwargs):
  138. """Write a notebook to a string."""
  139. msg = "loads must be implemented in a subclass"
  140. raise NotImplementedError(msg)
  141. def write(self, nb, fp, **kwargs):
  142. """Write a notebook to a file like object"""
  143. nbs = self.writes(nb, **kwargs)
  144. return fp.write(nbs)