exporter.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. """This module defines a base Exporter class. For Jinja template-based export,
  2. see templateexporter.py.
  3. """
  4. # Copyright (c) Jupyter Development Team.
  5. # Distributed under the terms of the Modified BSD License.
  6. from __future__ import annotations
  7. import collections
  8. import copy
  9. import datetime
  10. import os
  11. import sys
  12. import typing as t
  13. import nbformat
  14. from nbformat import NotebookNode, validator
  15. from traitlets import Bool, HasTraits, List, TraitError, Unicode
  16. from traitlets.config import Config
  17. from traitlets.config.configurable import LoggingConfigurable
  18. from traitlets.utils.importstring import import_item
  19. class ResourcesDict(collections.defaultdict): # type:ignore[type-arg]
  20. """A default dict for resources."""
  21. def __missing__(self, key):
  22. """Handle missing value."""
  23. return ""
  24. class FilenameExtension(Unicode): # type:ignore[type-arg]
  25. """A trait for filename extensions."""
  26. default_value = ""
  27. info_text = "a filename extension, beginning with a dot"
  28. def validate(self, obj, value):
  29. """Validate the file name."""
  30. # cast to proper unicode
  31. value = super().validate(obj, value)
  32. # check that it starts with a dot
  33. if value and not value.startswith("."):
  34. msg = "FileExtension trait '{}' does not begin with a dot: {!r}"
  35. raise TraitError(msg.format(self.name, value))
  36. return value
  37. class Exporter(LoggingConfigurable):
  38. """
  39. Class containing methods that sequentially run a list of preprocessors on a
  40. NotebookNode object and then return the modified NotebookNode object and
  41. accompanying resources dict.
  42. """
  43. enabled = Bool(True, help="Disable this exporter (and any exporters inherited from it).").tag(
  44. config=True
  45. )
  46. file_extension = FilenameExtension(
  47. help="Extension of the file that should be written to disk"
  48. ).tag(config=True)
  49. optimistic_validation = Bool(
  50. False,
  51. help="Reduces the number of validation steps so that it only occurs after all preprocesors have run.",
  52. ).tag(config=True)
  53. # MIME type of the result file, for HTTP response headers.
  54. # This is *not* a traitlet, because we want to be able to access it from
  55. # the class, not just on instances.
  56. output_mimetype = ""
  57. # Should this converter be accessible from the notebook front-end?
  58. # If so, should be a friendly name to display (and possibly translated).
  59. export_from_notebook: str = None # type:ignore[assignment]
  60. # Configurability, allows the user to easily add filters and preprocessors.
  61. preprocessors: List[t.Any] = List(
  62. help="""List of preprocessors, by name or namespace, to enable."""
  63. ).tag(config=True)
  64. _preprocessors: List[t.Any] = List()
  65. default_preprocessors: List[t.Any] = List(
  66. [
  67. "nbconvert.preprocessors.TagRemovePreprocessor",
  68. "nbconvert.preprocessors.RegexRemovePreprocessor",
  69. "nbconvert.preprocessors.ClearOutputPreprocessor",
  70. "nbconvert.preprocessors.CoalesceStreamsPreprocessor",
  71. "nbconvert.preprocessors.ExecutePreprocessor",
  72. "nbconvert.preprocessors.SVG2PDFPreprocessor",
  73. "nbconvert.preprocessors.LatexPreprocessor",
  74. "nbconvert.preprocessors.HighlightMagicsPreprocessor",
  75. "nbconvert.preprocessors.ExtractOutputPreprocessor",
  76. "nbconvert.preprocessors.ExtractAttachmentsPreprocessor",
  77. "nbconvert.preprocessors.ClearMetadataPreprocessor",
  78. ],
  79. help="""List of preprocessors available by default, by name, namespace,
  80. instance, or type.""",
  81. ).tag(config=True)
  82. def __init__(self, config=None, **kw):
  83. """
  84. Public constructor
  85. Parameters
  86. ----------
  87. config : ``traitlets.config.Config``
  88. User configuration instance.
  89. `**kw`
  90. Additional keyword arguments passed to parent __init__
  91. """
  92. with_default_config = self.default_config
  93. if config:
  94. with_default_config.merge(config)
  95. super().__init__(config=with_default_config, **kw)
  96. self._init_preprocessors()
  97. self._nb_metadata = {}
  98. @property
  99. def default_config(self):
  100. return Config()
  101. def from_notebook_node(
  102. self, nb: NotebookNode, resources: t.Any | None = None, **kw: t.Any
  103. ) -> tuple[NotebookNode, dict[str, t.Any]]:
  104. """
  105. Convert a notebook from a notebook node instance.
  106. Parameters
  107. ----------
  108. nb : :class:`~nbformat.NotebookNode`
  109. Notebook node (dict-like with attr-access)
  110. resources : dict
  111. Additional resources that can be accessed read/write by
  112. preprocessors and filters.
  113. `**kw`
  114. Ignored
  115. """
  116. nb_copy = copy.deepcopy(nb)
  117. resources = self._init_resources(resources)
  118. if "language" in nb["metadata"]:
  119. resources["language"] = nb["metadata"]["language"].lower()
  120. # Preprocess
  121. nb_copy, resources = self._preprocess(nb_copy, resources)
  122. notebook_name = ""
  123. if resources is not None:
  124. name = resources.get("metadata", {}).get("name", "")
  125. path = resources.get("metadata", {}).get("path", "")
  126. notebook_name = os.path.join(path, name)
  127. self._nb_metadata[notebook_name] = nb_copy.metadata
  128. return nb_copy, resources
  129. def from_filename(
  130. self, filename: str, resources: dict[str, t.Any] | None = None, **kw: t.Any
  131. ) -> tuple[NotebookNode, dict[str, t.Any]]:
  132. """
  133. Convert a notebook from a notebook file.
  134. Parameters
  135. ----------
  136. filename : str
  137. Full filename of the notebook file to open and convert.
  138. resources : dict
  139. Additional resources that can be accessed read/write by
  140. preprocessors and filters.
  141. `**kw`
  142. Ignored
  143. """
  144. # Pull the metadata from the filesystem.
  145. if resources is None:
  146. resources = ResourcesDict()
  147. if "metadata" not in resources or resources["metadata"] == "":
  148. resources["metadata"] = ResourcesDict()
  149. path, basename = os.path.split(filename)
  150. notebook_name = os.path.splitext(basename)[0]
  151. resources["metadata"]["name"] = notebook_name
  152. resources["metadata"]["path"] = path
  153. modified_date = datetime.datetime.fromtimestamp(
  154. os.path.getmtime(filename), tz=datetime.timezone.utc
  155. )
  156. # datetime.strftime date format for ipython
  157. if sys.platform == "win32":
  158. date_format = "%B %d, %Y"
  159. else:
  160. date_format = "%B %-d, %Y"
  161. resources["metadata"]["modified_date"] = modified_date.strftime(date_format)
  162. with open(filename, encoding="utf-8") as f:
  163. return self.from_file(f, resources=resources, **kw)
  164. def from_file(
  165. self, file_stream: t.Any, resources: dict[str, t.Any] | None = None, **kw: t.Any
  166. ) -> tuple[NotebookNode, dict[str, t.Any]]:
  167. """
  168. Convert a notebook from a notebook file.
  169. Parameters
  170. ----------
  171. file_stream : file-like object
  172. Notebook file-like object to convert.
  173. resources : dict
  174. Additional resources that can be accessed read/write by
  175. preprocessors and filters.
  176. `**kw`
  177. Ignored
  178. """
  179. return self.from_notebook_node(
  180. nbformat.read(file_stream, as_version=4), resources=resources, **kw
  181. )
  182. def register_preprocessor(self, preprocessor, enabled=False):
  183. """
  184. Register a preprocessor.
  185. Preprocessors are classes that act upon the notebook before it is
  186. passed into the Jinja templating engine. Preprocessors are also
  187. capable of passing additional information to the Jinja
  188. templating engine.
  189. Parameters
  190. ----------
  191. preprocessor : `nbconvert.preprocessors.Preprocessor`
  192. A dotted module name, a type, or an instance
  193. enabled : bool
  194. Mark the preprocessor as enabled
  195. """
  196. if preprocessor is None:
  197. msg = "preprocessor must not be None"
  198. raise TypeError(msg)
  199. isclass = isinstance(preprocessor, type)
  200. constructed = not isclass
  201. # Handle preprocessor's registration based on it's type
  202. if constructed and isinstance(
  203. preprocessor,
  204. str,
  205. ):
  206. # Preprocessor is a string, import the namespace and recursively call
  207. # this register_preprocessor method
  208. preprocessor_cls = import_item(preprocessor)
  209. return self.register_preprocessor(preprocessor_cls, enabled)
  210. if constructed and callable(preprocessor):
  211. # Preprocessor is a function, no need to construct it.
  212. # Register and return the preprocessor.
  213. if enabled:
  214. preprocessor.enabled = True
  215. self._preprocessors.append(preprocessor)
  216. return preprocessor
  217. if isclass and issubclass(preprocessor, HasTraits):
  218. # Preprocessor is configurable. Make sure to pass in new default for
  219. # the enabled flag if one was specified.
  220. self.register_preprocessor(preprocessor(parent=self), enabled)
  221. return None
  222. if isclass:
  223. # Preprocessor is not configurable, construct it
  224. self.register_preprocessor(preprocessor(), enabled)
  225. return None
  226. # Preprocessor is an instance of something without a __call__
  227. # attribute.
  228. raise TypeError(
  229. "preprocessor must be callable or an importable constructor, got %r" % preprocessor
  230. )
  231. def _init_preprocessors(self):
  232. """
  233. Register all of the preprocessors needed for this exporter, disabled
  234. unless specified explicitly.
  235. """
  236. self._preprocessors = []
  237. # Load default preprocessors (not necessarily enabled by default).
  238. for preprocessor in self.default_preprocessors:
  239. self.register_preprocessor(preprocessor)
  240. # Load user-specified preprocessors. Enable by default.
  241. for preprocessor in self.preprocessors:
  242. self.register_preprocessor(preprocessor, enabled=True)
  243. def _init_resources(self, resources):
  244. # Make sure the resources dict is of ResourcesDict type.
  245. if resources is None:
  246. resources = ResourcesDict()
  247. if not isinstance(resources, ResourcesDict):
  248. new_resources = ResourcesDict()
  249. new_resources.update(resources)
  250. resources = new_resources
  251. # Make sure the metadata extension exists in resources
  252. if "metadata" in resources:
  253. if not isinstance(resources["metadata"], ResourcesDict):
  254. new_metadata = ResourcesDict()
  255. new_metadata.update(resources["metadata"])
  256. resources["metadata"] = new_metadata
  257. else:
  258. resources["metadata"] = ResourcesDict()
  259. if not resources["metadata"]["name"]:
  260. resources["metadata"]["name"] = "Notebook"
  261. # Set the output extension
  262. resources["output_extension"] = self.file_extension
  263. return resources
  264. def _validate_preprocessor(self, nbc, preprocessor):
  265. try:
  266. nbformat.validate(nbc, relax_add_props=True)
  267. except nbformat.ValidationError:
  268. self.log.error("Notebook is invalid after preprocessor %s", preprocessor)
  269. raise
  270. def _preprocess(self, nb, resources):
  271. """
  272. Preprocess the notebook before passing it into the Jinja engine.
  273. To preprocess the notebook is to successively apply all the
  274. enabled preprocessors. Output from each preprocessor is passed
  275. along to the next one.
  276. Parameters
  277. ----------
  278. nb : notebook node
  279. notebook that is being exported.
  280. resources : a dict of additional resources that
  281. can be accessed read/write by preprocessors
  282. """
  283. # Do a copy.deepcopy first,
  284. # we are never safe enough with what the preprocessors could do.
  285. nbc = copy.deepcopy(nb)
  286. resc = copy.deepcopy(resources)
  287. if hasattr(validator, "normalize"):
  288. _, nbc = validator.normalize(nbc)
  289. # Run each preprocessor on the notebook. Carry the output along
  290. # to each preprocessor
  291. for preprocessor in self._preprocessors:
  292. nbc, resc = preprocessor(nbc, resc)
  293. if not self.optimistic_validation:
  294. self._validate_preprocessor(nbc, preprocessor)
  295. if self.optimistic_validation:
  296. self._validate_preprocessor(nbc, preprocessor)
  297. return nbc, resc