nbpy.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. """Read and write notebooks as regular .py files.
  2. Authors:
  3. * Brian Granger
  4. """
  5. # -----------------------------------------------------------------------------
  6. # Copyright (C) 2008-2011 The IPython Development Team
  7. #
  8. # Distributed under the terms of the BSD License. The full license is in
  9. # the file LICENSE, distributed as part of this software.
  10. # -----------------------------------------------------------------------------
  11. # -----------------------------------------------------------------------------
  12. # Imports
  13. # -----------------------------------------------------------------------------
  14. from __future__ import annotations
  15. import re
  16. from .nbbase import new_code_cell, new_notebook, new_text_cell, new_worksheet
  17. from .rwbase import NotebookReader, NotebookWriter
  18. # -----------------------------------------------------------------------------
  19. # Code
  20. # -----------------------------------------------------------------------------
  21. _encoding_declaration_re = re.compile(r"^#.*coding[:=]\s*([-\w.]+)")
  22. class PyReaderError(Exception):
  23. """An error raised by the PyReader."""
  24. class PyReader(NotebookReader):
  25. """A Python notebook reader."""
  26. def reads(self, s, **kwargs):
  27. """Convert a string to a notebook."""
  28. return self.to_notebook(s, **kwargs)
  29. def to_notebook(self, s, **kwargs):
  30. """Convert a string to a notebook."""
  31. lines = s.splitlines()
  32. cells = []
  33. cell_lines: list[str] = []
  34. state = "codecell"
  35. for line in lines:
  36. if line.startswith("# <nbformat>") or _encoding_declaration_re.match(line):
  37. pass
  38. elif line.startswith("# <codecell>"):
  39. cell = self.new_cell(state, cell_lines)
  40. if cell is not None:
  41. cells.append(cell)
  42. state = "codecell"
  43. cell_lines = []
  44. elif line.startswith("# <htmlcell>"):
  45. cell = self.new_cell(state, cell_lines)
  46. if cell is not None:
  47. cells.append(cell)
  48. state = "htmlcell"
  49. cell_lines = []
  50. elif line.startswith("# <markdowncell>"):
  51. cell = self.new_cell(state, cell_lines)
  52. if cell is not None:
  53. cells.append(cell)
  54. state = "markdowncell"
  55. cell_lines = []
  56. else:
  57. cell_lines.append(line)
  58. if cell_lines and state == "codecell":
  59. cell = self.new_cell(state, cell_lines)
  60. if cell is not None:
  61. cells.append(cell)
  62. ws = new_worksheet(cells=cells)
  63. return new_notebook(worksheets=[ws])
  64. def new_cell(self, state, lines):
  65. """Create a new cell."""
  66. if state == "codecell":
  67. input_ = "\n".join(lines)
  68. input_ = input_.strip("\n")
  69. if input_:
  70. return new_code_cell(input=input_)
  71. elif state == "htmlcell":
  72. text = self._remove_comments(lines)
  73. if text:
  74. return new_text_cell("html", source=text)
  75. elif state == "markdowncell":
  76. text = self._remove_comments(lines)
  77. if text:
  78. return new_text_cell("markdown", source=text)
  79. def _remove_comments(self, lines):
  80. new_lines = []
  81. for line in lines:
  82. if line.startswith("#"):
  83. new_lines.append(line[2:])
  84. else:
  85. new_lines.append(line)
  86. text = "\n".join(new_lines)
  87. text = text.strip("\n")
  88. return text # noqa: RET504
  89. def split_lines_into_blocks(self, lines):
  90. """Split lines into code blocks."""
  91. if len(lines) == 1:
  92. yield lines[0]
  93. raise StopIteration()
  94. import ast
  95. source = "\n".join(lines)
  96. code = ast.parse(source)
  97. starts = [x.lineno - 1 for x in code.body]
  98. for i in range(len(starts) - 1):
  99. yield "\n".join(lines[starts[i] : starts[i + 1]]).strip("\n")
  100. yield "\n".join(lines[starts[-1] :]).strip("\n")
  101. class PyWriter(NotebookWriter):
  102. """A Python notebook writer."""
  103. def writes(self, nb, **kwargs):
  104. """Convert a notebook object to a string."""
  105. lines = ["# -*- coding: utf-8 -*-"]
  106. lines.extend(["# <nbformat>2</nbformat>", ""])
  107. for ws in nb.worksheets:
  108. for cell in ws.cells:
  109. if cell.cell_type == "code":
  110. input_ = cell.get("input")
  111. if input_ is not None:
  112. lines.extend(["# <codecell>", ""])
  113. lines.extend(input_.splitlines())
  114. lines.append("")
  115. elif cell.cell_type == "html":
  116. input_ = cell.get("source")
  117. if input_ is not None:
  118. lines.extend(["# <htmlcell>", ""])
  119. lines.extend(["# " + line for line in input_.splitlines()])
  120. lines.append("")
  121. elif cell.cell_type == "markdown":
  122. input_ = cell.get("source")
  123. if input_ is not None:
  124. lines.extend(["# <markdowncell>", ""])
  125. lines.extend(["# " + line for line in input_.splitlines()])
  126. lines.append("")
  127. lines.append("")
  128. return str("\n".join(lines))
  129. _reader = PyReader()
  130. _writer = PyWriter()
  131. reads = _reader.reads
  132. read = _reader.read
  133. to_notebook = _reader.to_notebook
  134. write = _writer.write
  135. writes = _writer.writes