convert.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. """Code for converting notebooks to and from v3."""
  2. # Copyright (c) IPython Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. from __future__ import annotations
  5. import json
  6. import re
  7. from traitlets.log import get_logger
  8. from nbformat import v3, validator
  9. from nbformat.corpus.words import generate_corpus_id as random_cell_id
  10. from nbformat.notebooknode import NotebookNode
  11. from .nbbase import nbformat, nbformat_minor
  12. def _warn_if_invalid(nb, version):
  13. """Log validation errors, if there are any."""
  14. from nbformat import ValidationError, validate
  15. try:
  16. validate(nb, version=version)
  17. except ValidationError as e:
  18. get_logger().error("Notebook JSON is not valid v%i: %s", version, e)
  19. def upgrade(nb, from_version=None, from_minor=None):
  20. """Convert a notebook to latest v4.
  21. Parameters
  22. ----------
  23. nb : NotebookNode
  24. The Python representation of the notebook to convert.
  25. from_version : int
  26. The original version of the notebook to convert.
  27. from_minor : int
  28. The original minor version of the notebook to convert (only relevant for v >= 3).
  29. """
  30. if not from_version:
  31. from_version = nb["nbformat"]
  32. if not from_minor:
  33. if "nbformat_minor" not in nb:
  34. if from_version == 4:
  35. msg = "The v4 notebook does not include the nbformat minor, which is needed."
  36. raise validator.ValidationError(msg)
  37. from_minor = 0
  38. else:
  39. from_minor = nb["nbformat_minor"]
  40. if from_version == 3:
  41. # Validate the notebook before conversion
  42. _warn_if_invalid(nb, from_version)
  43. # Mark the original nbformat so consumers know it has been converted
  44. orig_nbformat = nb.pop("orig_nbformat", None)
  45. orig_nbformat_minor = nb.pop("orig_nbformat_minor", None)
  46. nb.metadata.orig_nbformat = orig_nbformat or 3
  47. nb.metadata.orig_nbformat_minor = orig_nbformat_minor or 0
  48. # Mark the new format
  49. nb.nbformat = nbformat
  50. nb.nbformat_minor = nbformat_minor
  51. # remove worksheet(s)
  52. nb["cells"] = cells = []
  53. # In the unlikely event of multiple worksheets,
  54. # they will be flattened
  55. for ws in nb.pop("worksheets", []):
  56. # upgrade each cell
  57. for cell in ws["cells"]:
  58. cells.append(upgrade_cell(cell))
  59. # upgrade metadata
  60. nb.metadata.pop("name", "")
  61. nb.metadata.pop("signature", "")
  62. # Validate the converted notebook before returning it
  63. _warn_if_invalid(nb, nbformat)
  64. return nb
  65. if from_version == 4:
  66. if from_minor == nbformat_minor:
  67. return nb
  68. # other versions migration code e.g.
  69. # if from_minor < 3:
  70. # if from_minor < 4:
  71. if from_minor < 5:
  72. for cell in nb.cells:
  73. cell.id = random_cell_id()
  74. nb.metadata.orig_nbformat_minor = from_minor
  75. nb.nbformat_minor = nbformat_minor
  76. return nb
  77. raise ValueError(
  78. "Cannot convert a notebook directly from v%s to v4. "
  79. "Try using the nbformat.convert module." % from_version
  80. )
  81. def upgrade_cell(cell):
  82. """upgrade a cell from v3 to v4
  83. heading cell:
  84. - -> markdown heading
  85. code cell:
  86. - remove language metadata
  87. - cell.input -> cell.source
  88. - cell.prompt_number -> cell.execution_count
  89. - update outputs
  90. """
  91. cell.setdefault("metadata", NotebookNode())
  92. cell.id = random_cell_id()
  93. if cell.cell_type == "code":
  94. cell.pop("language", "")
  95. if "collapsed" in cell:
  96. cell.metadata["collapsed"] = cell.pop("collapsed")
  97. cell.source = cell.pop("input", "")
  98. cell.execution_count = cell.pop("prompt_number", None)
  99. cell.outputs = upgrade_outputs(cell.outputs)
  100. elif cell.cell_type == "heading":
  101. cell.cell_type = "markdown"
  102. level = cell.pop("level", 1)
  103. cell.source = "{hashes} {single_line}".format(
  104. hashes="#" * level,
  105. single_line=" ".join(cell.get("source", "").splitlines()),
  106. )
  107. elif cell.cell_type == "html":
  108. # Technically, this exists. It will never happen in practice.
  109. cell.cell_type = "markdown"
  110. return cell
  111. def downgrade_cell(cell):
  112. """downgrade a cell from v4 to v3
  113. code cell:
  114. - set cell.language
  115. - cell.input <- cell.source
  116. - cell.prompt_number <- cell.execution_count
  117. - update outputs
  118. markdown cell:
  119. - single-line heading -> heading cell
  120. """
  121. if cell.cell_type == "code":
  122. cell.language = "python"
  123. cell.input = cell.pop("source", "")
  124. cell.prompt_number = cell.pop("execution_count", None)
  125. cell.collapsed = cell.metadata.pop("collapsed", False)
  126. cell.outputs = downgrade_outputs(cell.outputs)
  127. elif cell.cell_type == "markdown":
  128. source = cell.get("source", "")
  129. if "\n" not in source and source.startswith("#"):
  130. match = re.match(r"(#+)\s*(.*)", source)
  131. assert match is not None
  132. prefix, text = match.groups()
  133. cell.cell_type = "heading"
  134. cell.source = text
  135. cell.level = len(prefix)
  136. cell.pop("id", None)
  137. cell.pop("attachments", None)
  138. return cell
  139. _mime_map = {
  140. "text": "text/plain",
  141. "html": "text/html",
  142. "svg": "image/svg+xml",
  143. "png": "image/png",
  144. "jpeg": "image/jpeg",
  145. "latex": "text/latex",
  146. "json": "application/json",
  147. "javascript": "application/javascript",
  148. }
  149. def to_mime_key(d):
  150. """convert dict with v3 aliases to plain mime-type keys"""
  151. for alias, mime in _mime_map.items():
  152. if alias in d:
  153. d[mime] = d.pop(alias)
  154. return d
  155. def from_mime_key(d):
  156. """convert dict with mime-type keys to v3 aliases"""
  157. d2 = {}
  158. for alias, mime in _mime_map.items():
  159. if mime in d:
  160. d2[alias] = d[mime]
  161. return d2
  162. def upgrade_output(output):
  163. """upgrade a single code cell output from v3 to v4
  164. - pyout -> execute_result
  165. - pyerr -> error
  166. - output.type -> output.data.mime/type
  167. - mime-type keys
  168. - stream.stream -> stream.name
  169. """
  170. if output["output_type"] in {"pyout", "display_data"}:
  171. output.setdefault("metadata", NotebookNode())
  172. if output["output_type"] == "pyout":
  173. output["output_type"] = "execute_result"
  174. output["execution_count"] = output.pop("prompt_number", None)
  175. # move output data into data sub-dict
  176. data = {}
  177. for key in list(output):
  178. if key in {"output_type", "execution_count", "metadata"}:
  179. continue
  180. data[key] = output.pop(key)
  181. to_mime_key(data)
  182. output["data"] = data
  183. to_mime_key(output.metadata)
  184. if "application/json" in data:
  185. data["application/json"] = json.loads(data["application/json"])
  186. # promote ascii bytes (from v2) to unicode
  187. for key in ("image/png", "image/jpeg"):
  188. if key in data and isinstance(data[key], bytes):
  189. data[key] = data[key].decode("ascii")
  190. elif output["output_type"] == "pyerr":
  191. output["output_type"] = "error"
  192. elif output["output_type"] == "stream":
  193. output["name"] = output.pop("stream", "stdout")
  194. return output
  195. def downgrade_output(output):
  196. """downgrade a single code cell output to v3 from v4
  197. - pyout <- execute_result
  198. - pyerr <- error
  199. - output.data.mime/type -> output.type
  200. - un-mime-type keys
  201. - stream.stream <- stream.name
  202. """
  203. if output["output_type"] in {"execute_result", "display_data"}:
  204. if output["output_type"] == "execute_result":
  205. output["output_type"] = "pyout"
  206. output["prompt_number"] = output.pop("execution_count", None)
  207. # promote data dict to top-level output namespace
  208. data = output.pop("data", {})
  209. if "application/json" in data:
  210. data["application/json"] = json.dumps(data["application/json"])
  211. data = from_mime_key(data)
  212. output.update(data)
  213. from_mime_key(output.get("metadata", {}))
  214. elif output["output_type"] == "error":
  215. output["output_type"] = "pyerr"
  216. elif output["output_type"] == "stream":
  217. output["stream"] = output.pop("name")
  218. return output
  219. def upgrade_outputs(outputs):
  220. """upgrade outputs of a code cell from v3 to v4"""
  221. return [upgrade_output(op) for op in outputs]
  222. def downgrade_outputs(outputs):
  223. """downgrade outputs of a code cell to v3 from v4"""
  224. return [downgrade_output(op) for op in outputs]
  225. def downgrade(nb):
  226. """Convert a v4 notebook to v3.
  227. Parameters
  228. ----------
  229. nb : NotebookNode
  230. The Python representation of the notebook to convert.
  231. """
  232. if nb.nbformat != nbformat:
  233. return nb
  234. # Validate the notebook before conversion
  235. _warn_if_invalid(nb, nbformat)
  236. nb.nbformat = v3.nbformat
  237. nb.nbformat_minor = v3.nbformat_minor
  238. cells = [downgrade_cell(cell) for cell in nb.pop("cells")]
  239. nb.worksheets = [v3.new_worksheet(cells=cells)]
  240. nb.metadata.setdefault("name", "")
  241. # Validate the converted notebook before returning it
  242. _warn_if_invalid(nb, v3.nbformat)
  243. nb.orig_nbformat = nb.metadata.pop("orig_nbformat", nbformat)
  244. nb.orig_nbformat_minor = nb.metadata.pop("orig_nbformat_minor", nbformat_minor)
  245. return nb