nbconvertapp.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. #!/usr/bin/env python
  2. """NbConvert is a utility for conversion of .ipynb files.
  3. Command-line interface for the NbConvert conversion utility.
  4. """
  5. # Copyright (c) IPython Development Team.
  6. # Distributed under the terms of the Modified BSD License.
  7. from __future__ import annotations
  8. import asyncio
  9. import glob
  10. import logging
  11. import os
  12. import sys
  13. import typing as t
  14. from textwrap import dedent, fill
  15. from jupyter_core.application import JupyterApp, base_aliases, base_flags
  16. from traitlets import Bool, DottedObjectName, Instance, List, Type, Unicode, default, observe
  17. from traitlets.config import Configurable, catch_config_error
  18. from traitlets.utils.importstring import import_item
  19. from nbconvert import __version__, exporters, postprocessors, preprocessors, writers
  20. from nbconvert.utils.text import indent
  21. from .exporters.base import get_export_names, get_exporter
  22. from .utils.base import NbConvertBase
  23. from .utils.exceptions import ConversionException
  24. from .utils.io import unicode_stdin_stream
  25. # -----------------------------------------------------------------------------
  26. # Classes and functions
  27. # -----------------------------------------------------------------------------
  28. class DottedOrNone(DottedObjectName):
  29. """A string holding a valid dotted object name in Python, such as A.b3._c
  30. Also allows for None type.
  31. """
  32. default_value = ""
  33. def validate(self, obj, value):
  34. """Validate an input."""
  35. if value is not None and len(value) > 0:
  36. return super().validate(obj, value)
  37. return value
  38. nbconvert_aliases = {}
  39. nbconvert_aliases.update(base_aliases)
  40. nbconvert_aliases.update(
  41. {
  42. "to": "NbConvertApp.export_format",
  43. "template": "TemplateExporter.template_name",
  44. "template-file": "TemplateExporter.template_file",
  45. "theme": "HTMLExporter.theme",
  46. "sanitize_html": "HTMLExporter.sanitize_html",
  47. "writer": "NbConvertApp.writer_class",
  48. "post": "NbConvertApp.postprocessor_class",
  49. "output": "NbConvertApp.output_base",
  50. "output-dir": "FilesWriter.build_directory",
  51. "reveal-prefix": "SlidesExporter.reveal_url_prefix",
  52. "nbformat": "NotebookExporter.nbformat_version",
  53. }
  54. )
  55. nbconvert_flags = {}
  56. nbconvert_flags.update(base_flags)
  57. nbconvert_flags.update(
  58. {
  59. "execute": (
  60. {"ExecutePreprocessor": {"enabled": True}},
  61. "Execute the notebook prior to export.",
  62. ),
  63. "allow-errors": (
  64. {"ExecutePreprocessor": {"allow_errors": True}},
  65. (
  66. "Continue notebook execution even if one of the cells throws "
  67. "an error and include the error message in the cell output "
  68. "(the default behaviour is to abort conversion). This flag "
  69. "is only relevant if '--execute' was specified, too."
  70. ),
  71. ),
  72. "stdin": (
  73. {
  74. "NbConvertApp": {
  75. "from_stdin": True,
  76. }
  77. },
  78. "read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'",
  79. ),
  80. "stdout": (
  81. {"NbConvertApp": {"writer_class": "StdoutWriter"}},
  82. "Write notebook output to stdout instead of files.",
  83. ),
  84. "inplace": (
  85. {
  86. "NbConvertApp": {
  87. "use_output_suffix": False,
  88. "export_format": "notebook",
  89. },
  90. "FilesWriter": {"build_directory": ""},
  91. },
  92. """Run nbconvert in place, overwriting the existing notebook (only
  93. relevant when converting to notebook format)""",
  94. ),
  95. "clear-output": (
  96. {
  97. "NbConvertApp": {
  98. "use_output_suffix": False,
  99. "export_format": "notebook",
  100. },
  101. "FilesWriter": {"build_directory": ""},
  102. "ClearOutputPreprocessor": {"enabled": True},
  103. },
  104. """Clear output of current file and save in place,
  105. overwriting the existing notebook. """,
  106. ),
  107. "coalesce-streams": (
  108. {
  109. "NbConvertApp": {"use_output_suffix": False, "export_format": "notebook"},
  110. "FilesWriter": {"build_directory": ""},
  111. "CoalesceStreamsPreprocessor": {"enabled": True},
  112. },
  113. """Coalesce consecutive stdout and stderr outputs into one stream (within each cell).""",
  114. ),
  115. "no-prompt": (
  116. {
  117. "TemplateExporter": {
  118. "exclude_input_prompt": True,
  119. "exclude_output_prompt": True,
  120. }
  121. },
  122. "Exclude input and output prompts from converted document.",
  123. ),
  124. "no-input": (
  125. {
  126. "TemplateExporter": {
  127. "exclude_output_prompt": True,
  128. "exclude_input": True,
  129. "exclude_input_prompt": True,
  130. }
  131. },
  132. """Exclude input cells and output prompts from converted document.
  133. This mode is ideal for generating code-free reports.""",
  134. ),
  135. "allow-chromium-download": (
  136. {
  137. "WebPDFExporter": {
  138. "allow_chromium_download": True,
  139. }
  140. },
  141. """Whether to allow downloading chromium if no suitable version is found on the system.""",
  142. ),
  143. "disable-chromium-sandbox": (
  144. {
  145. "WebPDFExporter": {
  146. "disable_sandbox": True,
  147. }
  148. },
  149. """Disable chromium security sandbox when converting to PDF..""",
  150. ),
  151. "show-input": (
  152. {
  153. "TemplateExporter": {
  154. "exclude_input": False,
  155. }
  156. },
  157. """Shows code input. This flag is only useful for dejavu users.""",
  158. ),
  159. "embed-images": (
  160. {
  161. "HTMLExporter": {
  162. "embed_images": True,
  163. }
  164. },
  165. """Embed the images as base64 dataurls in the output. This flag is only useful for the HTML/WebPDF/Slides exports.""",
  166. ),
  167. "sanitize-html": (
  168. {
  169. "HTMLExporter": {
  170. "sanitize_html": True,
  171. }
  172. },
  173. """Whether the HTML in Markdown cells and cell outputs should be sanitized..""",
  174. ),
  175. }
  176. )
  177. class NbConvertApp(JupyterApp):
  178. """Application used to convert from notebook file type (``*.ipynb``)"""
  179. version = __version__
  180. name = "jupyter-nbconvert"
  181. aliases = nbconvert_aliases
  182. flags = nbconvert_flags
  183. @default("log_level")
  184. def _log_level_default(self):
  185. return logging.INFO
  186. classes: list[type] = List() # type: ignore[assignment]
  187. @default("classes")
  188. def _classes_default(self):
  189. classes: list[type[t.Any]] = [NbConvertBase]
  190. for pkg in (exporters, preprocessors, writers, postprocessors):
  191. for name in dir(pkg):
  192. cls = getattr(pkg, name)
  193. if isinstance(cls, type) and issubclass(cls, Configurable):
  194. classes.append(cls)
  195. return classes
  196. description = Unicode(
  197. """This application is used to convert notebook files (*.ipynb)
  198. to various other formats.
  199. WARNING: THE COMMANDLINE INTERFACE MAY CHANGE IN FUTURE RELEASES."""
  200. )
  201. output_base = Unicode(
  202. "{notebook_name}",
  203. help="""Overwrite base name use for output files.
  204. Supports pattern replacements '{notebook_name}'.
  205. """,
  206. ).tag(config=True)
  207. use_output_suffix = Bool(
  208. True,
  209. help="""Whether to apply a suffix prior to the extension (only relevant
  210. when converting to notebook format). The suffix is determined by
  211. the exporter, and is usually '.nbconvert'.""",
  212. ).tag(config=True)
  213. output_files_dir = Unicode(
  214. "{notebook_name}_files",
  215. help="""Directory to copy extra files (figures) to.
  216. '{notebook_name}' in the string will be converted to notebook
  217. basename.""",
  218. ).tag(config=True)
  219. examples = Unicode(
  220. f"""
  221. The simplest way to use nbconvert is
  222. > jupyter nbconvert mynotebook.ipynb --to html
  223. Options include {get_export_names()}.
  224. > jupyter nbconvert --to latex mynotebook.ipynb
  225. Both HTML and LaTeX support multiple output templates. LaTeX includes
  226. 'base', 'article' and 'report'. HTML includes 'basic', 'lab' and
  227. 'classic'. You can specify the flavor of the format used.
  228. > jupyter nbconvert --to html --template lab mynotebook.ipynb
  229. You can also pipe the output to stdout, rather than a file
  230. > jupyter nbconvert mynotebook.ipynb --stdout
  231. PDF is generated via latex
  232. > jupyter nbconvert mynotebook.ipynb --to pdf
  233. You can get (and serve) a Reveal.js-powered slideshow
  234. > jupyter nbconvert myslides.ipynb --to slides --post serve
  235. Multiple notebooks can be given at the command line in a couple of
  236. different ways:
  237. > jupyter nbconvert notebook*.ipynb
  238. > jupyter nbconvert notebook1.ipynb notebook2.ipynb
  239. or you can specify the notebooks list in a config file, containing::
  240. c.NbConvertApp.notebooks = ["my_notebook.ipynb"]
  241. > jupyter nbconvert --config mycfg.py
  242. """
  243. )
  244. # Writer specific variables
  245. writer = Instance(
  246. "nbconvert.writers.base.WriterBase",
  247. help="""Instance of the writer class used to write the
  248. results of the conversion.""",
  249. allow_none=True,
  250. )
  251. writer_class = DottedObjectName(
  252. "FilesWriter",
  253. help="""Writer class used to write the
  254. results of the conversion""",
  255. ).tag(config=True)
  256. writer_aliases = {
  257. "fileswriter": "nbconvert.writers.files.FilesWriter",
  258. "debugwriter": "nbconvert.writers.debug.DebugWriter",
  259. "stdoutwriter": "nbconvert.writers.stdout.StdoutWriter",
  260. }
  261. writer_factory = Type(allow_none=True)
  262. @observe("writer_class")
  263. def _writer_class_changed(self, change):
  264. new = change["new"]
  265. if new.lower() in self.writer_aliases:
  266. new = self.writer_aliases[new.lower()]
  267. self.writer_factory = import_item(new)
  268. # Post-processor specific variables
  269. postprocessor = Instance(
  270. "nbconvert.postprocessors.base.PostProcessorBase",
  271. help="""Instance of the PostProcessor class used to write the
  272. results of the conversion.""",
  273. allow_none=True,
  274. )
  275. postprocessor_class = DottedOrNone(
  276. help="""PostProcessor class used to write the
  277. results of the conversion"""
  278. ).tag(config=True)
  279. postprocessor_aliases = {"serve": "nbconvert.postprocessors.serve.ServePostProcessor"}
  280. postprocessor_factory = Type(None, allow_none=True)
  281. @observe("postprocessor_class")
  282. def _postprocessor_class_changed(self, change):
  283. new = change["new"]
  284. if new.lower() in self.postprocessor_aliases:
  285. new = self.postprocessor_aliases[new.lower()]
  286. if new:
  287. self.postprocessor_factory = import_item(new)
  288. export_format = Unicode( # type:ignore[call-overload]
  289. allow_none=False,
  290. help=f"""The export format to be used, either one of the built-in formats
  291. {get_export_names()}
  292. or a dotted object name that represents the import path for an
  293. ``Exporter`` class""",
  294. ).tag(config=True)
  295. notebooks = List(
  296. Unicode(),
  297. help="""List of notebooks to convert.
  298. Wildcards are supported.
  299. Filenames passed positionally will be added to the list.
  300. """,
  301. ).tag(config=True)
  302. from_stdin = Bool(False, help="read a single notebook from stdin.").tag(config=True)
  303. recursive_glob = Bool(
  304. False, help="set the 'recursive' option for glob for searching wildcards."
  305. ).tag(config=True)
  306. @catch_config_error
  307. def initialize(self, argv=None):
  308. """Initialize application, notebooks, writer, and postprocessor"""
  309. # See https://bugs.python.org/issue37373 :(
  310. if sys.platform.startswith("win"):
  311. asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
  312. self.init_syspath()
  313. super().initialize(argv)
  314. if hasattr(self, "load_config_environ"):
  315. self.load_config_environ()
  316. self.init_notebooks()
  317. self.init_writer()
  318. self.init_postprocessor()
  319. def init_syspath(self):
  320. """Add the cwd to the sys.path ($PYTHONPATH)"""
  321. sys.path.insert(0, os.getcwd())
  322. def init_notebooks(self):
  323. """Construct the list of notebooks.
  324. If notebooks are passed on the command-line,
  325. they override (rather than add) notebooks specified in config files.
  326. Glob each notebook to replace notebook patterns with filenames.
  327. """
  328. # Specifying notebooks on the command-line overrides (rather than
  329. # adds) the notebook list
  330. patterns = self.extra_args if self.extra_args else self.notebooks
  331. # Use glob to replace all the notebook patterns with filenames.
  332. filenames = []
  333. for pattern in patterns:
  334. # Use glob to find matching filenames. Allow the user to convert
  335. # notebooks without having to type the extension.
  336. globbed_files = glob.glob(pattern, recursive=self.recursive_glob)
  337. globbed_files.extend(glob.glob(pattern + ".ipynb", recursive=self.recursive_glob))
  338. if not globbed_files:
  339. self.log.warning("pattern %r matched no files", pattern)
  340. for filename in globbed_files:
  341. if filename not in filenames:
  342. filenames.append(filename)
  343. self.notebooks = filenames
  344. def init_writer(self):
  345. """Initialize the writer (which is stateless)"""
  346. self._writer_class_changed({"new": self.writer_class})
  347. if self.writer_factory:
  348. self.writer = self.writer_factory(parent=self)
  349. if hasattr(self.writer, "build_directory") and self.writer.build_directory != "":
  350. self.use_output_suffix = False
  351. def init_postprocessor(self):
  352. """Initialize the postprocessor (which is stateless)"""
  353. self._postprocessor_class_changed({"new": self.postprocessor_class})
  354. if self.postprocessor_factory:
  355. self.postprocessor = self.postprocessor_factory(parent=self)
  356. def start(self):
  357. """Run start after initialization process has completed"""
  358. super().start()
  359. self.convert_notebooks()
  360. def _notebook_filename_to_name(self, notebook_filename):
  361. """
  362. Returns the notebook name from the notebook filename by
  363. applying `output_base` pattern and stripping extension
  364. """
  365. basename = os.path.basename(notebook_filename)
  366. notebook_name = basename[: basename.rfind(".")]
  367. notebook_name = self.output_base.format(notebook_name=notebook_name)
  368. return notebook_name # noqa: RET504
  369. def init_single_notebook_resources(self, notebook_filename):
  370. """Step 1: Initialize resources
  371. This initializes the resources dictionary for a single notebook.
  372. Returns
  373. -------
  374. dict
  375. resources dictionary for a single notebook that MUST include the following keys:
  376. - config_dir: the location of the Jupyter config directory
  377. - unique_key: the notebook name
  378. - output_files_dir: a directory where output files (not
  379. including the notebook itself) should be saved
  380. """
  381. notebook_name = self._notebook_filename_to_name(notebook_filename)
  382. self.log.debug("Notebook name is '%s'", notebook_name)
  383. # first initialize the resources we want to use
  384. resources = {}
  385. resources["config_dir"] = self.config_dir
  386. resources["unique_key"] = notebook_name
  387. output_files_dir = self.output_files_dir.format(notebook_name=notebook_name)
  388. resources["output_files_dir"] = output_files_dir
  389. return resources
  390. def export_single_notebook(self, notebook_filename, resources, input_buffer=None):
  391. """Step 2: Export the notebook
  392. Exports the notebook to a particular format according to the specified
  393. exporter. This function returns the output and (possibly modified)
  394. resources from the exporter.
  395. Parameters
  396. ----------
  397. notebook_filename : str
  398. name of notebook file.
  399. resources : dict
  400. input_buffer :
  401. readable file-like object returning unicode.
  402. if not None, notebook_filename is ignored
  403. Returns
  404. -------
  405. output
  406. dict
  407. resources (possibly modified)
  408. """
  409. try:
  410. if input_buffer is not None:
  411. output, resources = self.exporter.from_file(input_buffer, resources=resources)
  412. else:
  413. output, resources = self.exporter.from_filename(
  414. notebook_filename, resources=resources
  415. )
  416. except ConversionException:
  417. self.log.error("Error while converting '%s'", notebook_filename, exc_info=True) # noqa: G201
  418. self.exit(1)
  419. return output, resources
  420. def write_single_notebook(self, output, resources):
  421. """Step 3: Write the notebook to file
  422. This writes output from the exporter to file using the specified writer.
  423. It returns the results from the writer.
  424. Parameters
  425. ----------
  426. output :
  427. resources : dict
  428. resources for a single notebook including name, config directory
  429. and directory to save output
  430. Returns
  431. -------
  432. file
  433. results from the specified writer output of exporter
  434. """
  435. if "unique_key" not in resources:
  436. msg = "unique_key MUST be specified in the resources, but it is not"
  437. raise KeyError(msg)
  438. notebook_name = resources["unique_key"]
  439. if self.use_output_suffix and self.output_base == "{notebook_name}":
  440. notebook_name += resources.get("output_suffix", "")
  441. if not self.writer:
  442. msg = "No writer object defined!"
  443. raise ValueError(msg)
  444. return self.writer.write(output, resources, notebook_name=notebook_name)
  445. def postprocess_single_notebook(self, write_results):
  446. """Step 4: Post-process the written file
  447. Only used if a postprocessor has been specified. After the
  448. converted notebook is written to a file in Step 3, this post-processes
  449. the notebook.
  450. """
  451. # Post-process if post processor has been defined.
  452. if hasattr(self, "postprocessor") and self.postprocessor:
  453. self.postprocessor(write_results)
  454. def convert_single_notebook(self, notebook_filename, input_buffer=None):
  455. """Convert a single notebook.
  456. Performs the following steps:
  457. 1. Initialize notebook resources
  458. 2. Export the notebook to a particular format
  459. 3. Write the exported notebook to file
  460. 4. (Maybe) postprocess the written file
  461. Parameters
  462. ----------
  463. notebook_filename : str
  464. input_buffer :
  465. If input_buffer is not None, conversion is done and the buffer is
  466. used as source into a file basenamed by the notebook_filename
  467. argument.
  468. """
  469. if input_buffer is None:
  470. self.log.info("Converting notebook %s to %s", notebook_filename, self.export_format)
  471. else:
  472. self.log.info("Converting notebook into %s", self.export_format)
  473. resources = self.init_single_notebook_resources(notebook_filename)
  474. output, resources = self.export_single_notebook(
  475. notebook_filename, resources, input_buffer=input_buffer
  476. )
  477. write_results = self.write_single_notebook(output, resources)
  478. self.postprocess_single_notebook(write_results)
  479. def convert_notebooks(self):
  480. """Convert the notebooks in the self.notebooks traitlet"""
  481. # no notebooks to convert!
  482. if len(self.notebooks) == 0 and not self.from_stdin:
  483. self.print_help()
  484. sys.exit(-1)
  485. if not self.export_format:
  486. msg = (
  487. "Please specify an output format with '--to <format>'."
  488. f"\nThe following formats are available: {get_export_names()}"
  489. )
  490. raise ValueError(msg)
  491. # initialize the exporter
  492. cls = get_exporter(self.export_format)
  493. self.exporter = cls(config=self.config)
  494. # strip duplicate extension from output_base, to avoid Basename.ext.ext
  495. if getattr(self.exporter, "file_extension", False):
  496. base, ext = os.path.splitext(self.output_base)
  497. if ext == self.exporter.file_extension:
  498. self.output_base = base
  499. # convert each notebook
  500. if not self.from_stdin:
  501. for notebook_filename in self.notebooks:
  502. self.convert_single_notebook(notebook_filename)
  503. else:
  504. input_buffer = unicode_stdin_stream()
  505. # default name when conversion from stdin
  506. self.convert_single_notebook("notebook.ipynb", input_buffer=input_buffer)
  507. input_buffer.close()
  508. def document_flag_help(self):
  509. """
  510. Return a string containing descriptions of all the flags.
  511. """
  512. flags = "The following flags are defined:\n\n"
  513. for flag, (cfg, fhelp) in self.flags.items():
  514. flags += f"{flag}\n"
  515. flags += indent(fill(fhelp, 80)) + "\n\n"
  516. flags += indent(fill("Long Form: " + str(cfg), 80)) + "\n\n"
  517. return flags
  518. def document_alias_help(self):
  519. """Return a string containing all of the aliases"""
  520. aliases = "The following aliases are defined:\n\n"
  521. for alias, longname in self.aliases.items():
  522. aliases += f"\t**{alias}** ({longname})\n\n"
  523. return aliases
  524. def document_config_options(self):
  525. """
  526. Provides a much improves version of the configuration documentation by
  527. breaking the configuration options into app, exporter, writer,
  528. preprocessor, postprocessor, and other sections.
  529. """
  530. categories = {
  531. category: [c for c in self._classes_inc_parents() if category in c.__name__.lower()]
  532. for category in ["app", "exporter", "writer", "preprocessor", "postprocessor"]
  533. }
  534. accounted_for = {c for category in categories.values() for c in category}
  535. categories["other"] = [c for c in self._classes_inc_parents() if c not in accounted_for]
  536. header = dedent(
  537. """
  538. {section} Options
  539. -----------------------
  540. """
  541. )
  542. sections = ""
  543. for category, value in categories.items():
  544. sections += header.format(section=category.title())
  545. if category in ["exporter", "preprocessor", "writer"]:
  546. sections += f".. image:: _static/{category}_inheritance.png\n\n"
  547. sections += "\n".join(c.class_config_rst_doc() for c in value)
  548. return sections.replace(" : ", r" \: ")
  549. class DejavuApp(NbConvertApp):
  550. """A deja vu app."""
  551. def initialize(self, argv=None):
  552. """Initialize the app."""
  553. self.config.TemplateExporter.exclude_input = True
  554. self.config.TemplateExporter.exclude_output_prompt = True
  555. self.config.TemplateExporter.exclude_input_prompt = True
  556. self.config.ExecutePreprocessor.enabled = True
  557. self.config.WebPDFExporter.paginate = False
  558. self.config.QtPDFExporter.paginate = False
  559. super().initialize(argv)
  560. if hasattr(self, "load_config_environ"):
  561. self.load_config_environ()
  562. @default("export_format")
  563. def _default_export_format(self):
  564. return "html"
  565. # -----------------------------------------------------------------------------
  566. # Main entry point
  567. # -----------------------------------------------------------------------------
  568. main = launch_new_instance = NbConvertApp.launch_instance
  569. dejavu_main = DejavuApp.launch_instance