nbpy.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. """Read and write notebooks as regular .py files.
  2. Authors:
  3. * Brian Granger
  4. """
  5. # -----------------------------------------------------------------------------
  6. # Copyright (C) 2008-2011 The IPython Development Team
  7. #
  8. # Distributed under the terms of the BSD License. The full license is in
  9. # the file LICENSE, distributed as part of this software.
  10. # -----------------------------------------------------------------------------
  11. # -----------------------------------------------------------------------------
  12. # Imports
  13. # -----------------------------------------------------------------------------
  14. from __future__ import annotations
  15. import re
  16. from .nbbase import (
  17. nbformat,
  18. nbformat_minor,
  19. new_code_cell,
  20. new_heading_cell,
  21. new_notebook,
  22. new_text_cell,
  23. new_worksheet,
  24. )
  25. from .rwbase import NotebookReader, NotebookWriter
  26. # -----------------------------------------------------------------------------
  27. # Code
  28. # -----------------------------------------------------------------------------
  29. _encoding_declaration_re = re.compile(r"^#.*coding[:=]\s*([-\w.]+)")
  30. class PyReaderError(Exception):
  31. """An error raised for a pyreader error."""
  32. class PyReader(NotebookReader):
  33. """A python notebook reader."""
  34. def reads(self, s, **kwargs):
  35. """Convert a string to a notebook"""
  36. return self.to_notebook(s, **kwargs)
  37. def to_notebook(self, s, **kwargs):
  38. """Convert a string to a notebook"""
  39. lines = s.splitlines()
  40. cells = []
  41. cell_lines: list[str] = []
  42. kwargs = {}
  43. state = "codecell"
  44. for line in lines:
  45. if line.startswith("# <nbformat>") or _encoding_declaration_re.match(line):
  46. pass
  47. elif line.startswith("# <codecell>"):
  48. cell = self.new_cell(state, cell_lines, **kwargs)
  49. if cell is not None:
  50. cells.append(cell)
  51. state = "codecell"
  52. cell_lines = []
  53. kwargs = {}
  54. elif line.startswith("# <htmlcell>"):
  55. cell = self.new_cell(state, cell_lines, **kwargs)
  56. if cell is not None:
  57. cells.append(cell)
  58. state = "htmlcell"
  59. cell_lines = []
  60. kwargs = {}
  61. elif line.startswith("# <markdowncell>"):
  62. cell = self.new_cell(state, cell_lines, **kwargs)
  63. if cell is not None:
  64. cells.append(cell)
  65. state = "markdowncell"
  66. cell_lines = []
  67. kwargs = {}
  68. # VERSIONHACK: plaintext -> raw
  69. elif line.startswith(("# <rawcell>", "# <plaintextcell>")):
  70. cell = self.new_cell(state, cell_lines, **kwargs)
  71. if cell is not None:
  72. cells.append(cell)
  73. state = "rawcell"
  74. cell_lines = []
  75. kwargs = {}
  76. elif line.startswith("# <headingcell"):
  77. cell = self.new_cell(state, cell_lines, **kwargs)
  78. if cell is not None:
  79. cells.append(cell)
  80. cell_lines = []
  81. m = re.match(r"# <headingcell level=(?P<level>\d)>", line)
  82. if m is not None:
  83. state = "headingcell"
  84. kwargs = {}
  85. kwargs["level"] = int(m.group("level"))
  86. else:
  87. state = "codecell"
  88. kwargs = {}
  89. cell_lines = []
  90. else:
  91. cell_lines.append(line)
  92. if cell_lines and state == "codecell":
  93. cell = self.new_cell(state, cell_lines)
  94. if cell is not None:
  95. cells.append(cell)
  96. ws = new_worksheet(cells=cells)
  97. return new_notebook(worksheets=[ws])
  98. def new_cell(self, state, lines, **kwargs):
  99. """Create a new cell."""
  100. if state == "codecell":
  101. input_ = "\n".join(lines)
  102. input_ = input_.strip("\n")
  103. if input_:
  104. return new_code_cell(input=input_)
  105. elif state == "htmlcell":
  106. text = self._remove_comments(lines)
  107. if text:
  108. return new_text_cell("html", source=text)
  109. elif state == "markdowncell":
  110. text = self._remove_comments(lines)
  111. if text:
  112. return new_text_cell("markdown", source=text)
  113. elif state == "rawcell":
  114. text = self._remove_comments(lines)
  115. if text:
  116. return new_text_cell("raw", source=text)
  117. elif state == "headingcell":
  118. text = self._remove_comments(lines)
  119. level = kwargs.get("level", 1)
  120. if text:
  121. return new_heading_cell(source=text, level=level)
  122. def _remove_comments(self, lines):
  123. new_lines = []
  124. for line in lines:
  125. if line.startswith("#"):
  126. new_lines.append(line[2:])
  127. else:
  128. new_lines.append(line)
  129. text = "\n".join(new_lines)
  130. text = text.strip("\n")
  131. return text # noqa: RET504
  132. def split_lines_into_blocks(self, lines):
  133. """Split lines into code blocks."""
  134. if len(lines) == 1:
  135. yield lines[0]
  136. raise StopIteration()
  137. import ast
  138. source = "\n".join(lines)
  139. code = ast.parse(source)
  140. starts = [x.lineno - 1 for x in code.body]
  141. for i in range(len(starts) - 1):
  142. yield "\n".join(lines[starts[i] : starts[i + 1]]).strip("\n")
  143. yield "\n".join(lines[starts[-1] :]).strip("\n")
  144. class PyWriter(NotebookWriter):
  145. """A Python notebook writer."""
  146. def writes(self, nb, **kwargs):
  147. """Convert a notebook to a string."""
  148. lines = ["# -*- coding: utf-8 -*-"]
  149. lines.extend(
  150. [
  151. "# <nbformat>%i.%i</nbformat>" % (nbformat, nbformat_minor),
  152. "",
  153. ]
  154. )
  155. for ws in nb.worksheets:
  156. for cell in ws.cells:
  157. if cell.cell_type == "code":
  158. input_ = cell.get("input")
  159. if input_ is not None:
  160. lines.extend(["# <codecell>", ""])
  161. lines.extend(input_.splitlines())
  162. lines.append("")
  163. elif cell.cell_type == "html":
  164. input_ = cell.get("source")
  165. if input_ is not None:
  166. lines.extend(["# <htmlcell>", ""])
  167. lines.extend(["# " + line for line in input_.splitlines()])
  168. lines.append("")
  169. elif cell.cell_type == "markdown":
  170. input_ = cell.get("source")
  171. if input_ is not None:
  172. lines.extend(["# <markdowncell>", ""])
  173. lines.extend(["# " + line for line in input_.splitlines()])
  174. lines.append("")
  175. elif cell.cell_type == "raw":
  176. input_ = cell.get("source")
  177. if input_ is not None:
  178. lines.extend(["# <rawcell>", ""])
  179. lines.extend(["# " + line for line in input_.splitlines()])
  180. lines.append("")
  181. elif cell.cell_type == "heading":
  182. input_ = cell.get("source")
  183. level = cell.get("level", 1)
  184. if input_ is not None:
  185. lines.extend(["# <headingcell level=%s>" % level, ""])
  186. lines.extend(["# " + line for line in input_.splitlines()])
  187. lines.append("")
  188. lines.append("")
  189. return "\n".join(lines)
  190. _reader = PyReader()
  191. _writer = PyWriter()
  192. reads = _reader.reads
  193. read = _reader.read
  194. to_notebook = _reader.to_notebook
  195. write = _writer.write
  196. writes = _writer.writes