| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693 |
- """This module defines TemplateExporter, a highly configurable converter
- that uses Jinja2 to export notebook files into different formats.
- """
- # Copyright (c) IPython Development Team.
- # Distributed under the terms of the Modified BSD License.
- from __future__ import annotations
- import html
- import json
- import os
- import typing as t
- import uuid
- import warnings
- from pathlib import Path
- from jinja2 import (
- BaseLoader,
- ChoiceLoader,
- DictLoader,
- Environment,
- FileSystemLoader,
- TemplateNotFound,
- )
- from jupyter_core.paths import jupyter_path
- from nbformat import NotebookNode
- from traitlets import Bool, Dict, HasTraits, List, Unicode, default, observe, validate
- from traitlets.config import Config
- from traitlets.utils.importstring import import_item
- from nbconvert import filters
- from .exporter import Exporter
- # Jinja2 extensions to load.
- JINJA_EXTENSIONS = ["jinja2.ext.loopcontrols"]
- ROOT = os.path.dirname(__file__)
- DEV_MODE = os.path.exists(os.path.join(ROOT, "../../.git"))
- default_filters = {
- "indent": filters.indent,
- "markdown2html": filters.markdown2html,
- "markdown2asciidoc": filters.markdown2asciidoc,
- "ansi2html": filters.ansi2html,
- "filter_data_type": filters.DataTypeFilter,
- "get_lines": filters.get_lines,
- "highlight2html": filters.Highlight2HTML,
- "highlight2latex": filters.Highlight2Latex,
- "ipython2python": filters.ipython2python,
- "posix_path": filters.posix_path,
- "markdown2latex": filters.markdown2latex,
- "markdown2rst": filters.markdown2rst,
- "comment_lines": filters.comment_lines,
- "strip_ansi": filters.strip_ansi,
- "strip_dollars": filters.strip_dollars,
- "strip_files_prefix": filters.strip_files_prefix,
- "html2text": filters.html2text,
- "add_anchor": filters.add_anchor,
- "ansi2latex": filters.ansi2latex,
- "wrap_text": filters.wrap_text,
- "escape_latex": filters.escape_latex,
- "citation2latex": filters.citation2latex,
- "path2url": filters.path2url,
- "add_prompts": filters.add_prompts,
- "ascii_only": filters.ascii_only,
- "prevent_list_blocks": filters.prevent_list_blocks,
- "get_metadata": filters.get_metadata,
- "convert_pandoc": filters.convert_pandoc,
- "json_dumps": json.dumps,
- # For removing any HTML
- "escape_html": lambda s: html.escape(str(s)),
- "escape_html_keep_quotes": lambda s: html.escape(str(s), quote=False),
- "escape_html_script": lambda s: s.replace("/", "\\/"),
- # For sanitizing HTML for any XSS
- "clean_html": filters.clean_html,
- "strip_trailing_newline": filters.strip_trailing_newline,
- "text_base64": filters.text_base64,
- }
- # copy of https://github.com/jupyter/jupyter_server/blob/b62458a7f5ad6b5246d2f142258dedaa409de5d9/jupyter_server/config_manager.py#L19
- def recursive_update(target, new):
- """Recursively update one dictionary using another.
- None values will delete their keys.
- """
- for k, v in new.items():
- if isinstance(v, dict):
- if k not in target:
- target[k] = {}
- recursive_update(target[k], v)
- if not target[k]:
- # Prune empty subdicts
- del target[k]
- elif v is None:
- target.pop(k, None)
- else:
- target[k] = v
- return target # return for convenience
- # define function at the top level to avoid pickle errors
- def deprecated(msg):
- """Emit a deprecation warning."""
- warnings.warn(msg, DeprecationWarning, stacklevel=2)
- class ExtensionTolerantLoader(BaseLoader):
- """A template loader which optionally adds a given extension when searching.
- Constructor takes two arguments: *loader* is another Jinja loader instance
- to wrap. *extension* is the extension, which will be added to the template
- name if finding the template without it fails. This should include the dot,
- e.g. '.tpl'.
- """
- def __init__(self, loader, extension):
- """Initialize the loader."""
- self.loader = loader
- self.extension = extension
- def get_source(self, environment, template):
- """Get the source for a template."""
- try:
- return self.loader.get_source(environment, template)
- except TemplateNotFound:
- if template.endswith(self.extension):
- raise TemplateNotFound(template) from None
- return self.loader.get_source(environment, template + self.extension)
- def list_templates(self):
- """List available templates."""
- return self.loader.list_templates()
- class TemplateExporter(Exporter):
- """
- Exports notebooks into other file formats. Uses Jinja 2 templating engine
- to output new formats. Inherit from this class if you are creating a new
- template type along with new filters/preprocessors. If the filters/
- preprocessors provided by default suffice, there is no need to inherit from
- this class. Instead, override the template_file and file_extension
- traits via a config file.
- Filters available by default for templates:
- {filters}
- """
- # finish the docstring
- __doc__ = (
- __doc__.format(filters="- " + "\n - ".join(sorted(default_filters.keys())))
- if __doc__
- else None
- )
- _template_cached = None
- def _invalidate_template_cache(self, change=None):
- self._template_cached = None
- @property
- def template(self):
- if self._template_cached is None:
- self._template_cached = self._load_template()
- return self._template_cached
- _environment_cached = None
- def _invalidate_environment_cache(self, change=None):
- self._environment_cached = None
- self._invalidate_template_cache()
- @property
- def environment(self):
- if self._environment_cached is None:
- self._environment_cached = self._create_environment()
- return self._environment_cached
- @property
- def default_config(self):
- c = Config(
- {
- "RegexRemovePreprocessor": {"enabled": True},
- "TagRemovePreprocessor": {"enabled": True},
- }
- )
- if super().default_config:
- c2 = super().default_config.copy()
- c2.merge(c)
- c = c2
- return c
- template_name = Unicode(help="Name of the template to use").tag(
- config=True, affects_template=True
- )
- template_file = Unicode(None, allow_none=True, help="Name of the template file to use").tag(
- config=True, affects_template=True
- )
- raw_template = Unicode("", help="raw template string").tag(affects_environment=True)
- enable_async = Bool(False, help="Enable Jinja async template execution").tag(
- affects_environment=True
- )
- _last_template_file = ""
- _raw_template_key = "<memory>"
- @validate("template_name")
- def _template_name_validate(self, change):
- template_name = change["value"]
- if template_name and template_name.endswith(".tpl"):
- warnings.warn(
- f"5.x style template name passed '{self.template_name}'. Use --template-name for the template directory with a index.<ext>.j2 file and/or --template-file to denote a different template.",
- DeprecationWarning,
- stacklevel=2,
- )
- directory, self.template_file = os.path.split(self.template_name)
- if directory:
- directory, template_name = os.path.split(directory)
- if directory and os.path.isabs(directory):
- self.extra_template_basedirs = [directory]
- return template_name
- @observe("template_file")
- def _template_file_changed(self, change):
- new = change["new"]
- if new == "default":
- self.template_file = self.default_template # type:ignore[attr-defined]
- return
- # check if template_file is a file path
- # rather than a name already on template_path
- full_path = os.path.abspath(new)
- if os.path.isfile(full_path):
- directory, self.template_file = os.path.split(full_path)
- self.extra_template_paths = [directory, *self.extra_template_paths]
- # While not strictly an invalid template file name, the extension hints that there isn't a template directory involved
- if self.template_file and self.template_file.endswith(".tpl"):
- warnings.warn(
- f"5.x style template file passed '{new}'. Use --template-name for the template directory with a index.<ext>.j2 file and/or --template-file to denote a different template.",
- DeprecationWarning,
- stacklevel=2,
- )
- @default("template_file")
- def _template_file_default(self):
- if self.template_extension:
- return "index" + self.template_extension
- return None
- @observe("raw_template")
- def _raw_template_changed(self, change):
- if not change["new"]:
- self.template_file = self._last_template_file
- self._invalidate_template_cache()
- template_paths = List(["."]).tag(config=True, affects_environment=True)
- extra_template_basedirs = List(Unicode()).tag(config=True, affects_environment=True)
- extra_template_paths = List(Unicode()).tag(config=True, affects_environment=True)
- @default("extra_template_basedirs")
- def _default_extra_template_basedirs(self):
- return [os.getcwd()]
- # Extension that the template files use.
- template_extension = Unicode().tag(config=True, affects_environment=True)
- template_data_paths = List(
- jupyter_path("nbconvert", "templates"), help="Path where templates can be installed too."
- ).tag(affects_environment=True)
- @default("template_extension")
- def _template_extension_default(self):
- if self.file_extension:
- return self.file_extension + ".j2"
- return self.file_extension
- exclude_input = Bool(
- False, help="This allows you to exclude code cell inputs from all templates if set to True."
- ).tag(config=True)
- exclude_input_prompt = Bool(
- False, help="This allows you to exclude input prompts from all templates if set to True."
- ).tag(config=True)
- exclude_output = Bool(
- False,
- help="This allows you to exclude code cell outputs from all templates if set to True.",
- ).tag(config=True)
- exclude_output_prompt = Bool(
- False, help="This allows you to exclude output prompts from all templates if set to True."
- ).tag(config=True)
- exclude_output_stdin = Bool(
- True,
- help="This allows you to exclude output of stdin stream from lab template if set to True.",
- ).tag(config=True)
- exclude_code_cell = Bool(
- False, help="This allows you to exclude code cells from all templates if set to True."
- ).tag(config=True)
- exclude_markdown = Bool(
- False, help="This allows you to exclude markdown cells from all templates if set to True."
- ).tag(config=True)
- exclude_raw = Bool(
- False, help="This allows you to exclude raw cells from all templates if set to True."
- ).tag(config=True)
- exclude_unknown = Bool(
- False, help="This allows you to exclude unknown cells from all templates if set to True."
- ).tag(config=True)
- extra_loaders: List[t.Any] = List(
- help="Jinja loaders to find templates. Will be tried in order "
- "before the default FileSystem ones.",
- ).tag(affects_environment=True)
- filters = Dict(
- help="""Dictionary of filters, by name and namespace, to add to the Jinja
- environment."""
- ).tag(config=True, affects_environment=True)
- raw_mimetypes = List(
- Unicode(), help="""formats of raw cells to be included in this Exporter's output."""
- ).tag(config=True)
- @default("raw_mimetypes")
- def _raw_mimetypes_default(self):
- return [self.output_mimetype, ""]
- # TODO: passing config is wrong, but changing this revealed more complicated issues
- def __init__(self, config=None, **kw):
- """
- Public constructor
- Parameters
- ----------
- config : config
- User configuration instance.
- extra_loaders : list[of Jinja Loaders]
- ordered list of Jinja loader to find templates. Will be tried in order
- before the default FileSystem ones.
- template_file : str (optional, kw arg)
- Template to use when exporting.
- """
- super().__init__(config=config, **kw)
- self.observe(
- self._invalidate_environment_cache, list(self.traits(affects_environment=True))
- )
- self.observe(self._invalidate_template_cache, list(self.traits(affects_template=True)))
- def _load_template(self):
- """Load the Jinja template object from the template file
- This is triggered by various trait changes that would change the template.
- """
- # this gives precedence to a raw_template if present
- with self.hold_trait_notifications():
- if self.template_file and (self.template_file != self._raw_template_key):
- self._last_template_file = self.template_file
- if self.raw_template:
- self.template_file = self._raw_template_key
- if not self.template_file:
- msg = "No template_file specified!"
- raise ValueError(msg)
- # First try to load the
- # template by name with extension added, then try loading the template
- # as if the name is explicitly specified.
- template_file = self.template_file
- self.log.debug("Attempting to load template %s", template_file)
- self.log.debug(" template_paths: %s", os.pathsep.join(self.template_paths))
- return self.environment.get_template(template_file)
- def from_filename( # type:ignore[override]
- self, filename: str, resources: dict[str, t.Any] | None = None, **kw: t.Any
- ) -> tuple[str, dict[str, t.Any]]:
- """Convert a notebook from a filename."""
- return super().from_filename(filename, resources, **kw) # type:ignore[return-value]
- def from_file( # type:ignore[override]
- self, file_stream: t.Any, resources: dict[str, t.Any] | None = None, **kw: t.Any
- ) -> tuple[str, dict[str, t.Any]]:
- """Convert a notebook from a file."""
- return super().from_file(file_stream, resources, **kw) # type:ignore[return-value]
- def from_notebook_node( # type:ignore[override]
- self, nb: NotebookNode, resources: dict[str, t.Any] | None = None, **kw: t.Any
- ) -> tuple[str, dict[str, t.Any]]:
- """
- Convert a notebook from a notebook node instance.
- Parameters
- ----------
- nb : :class:`~nbformat.NotebookNode`
- Notebook node
- resources : dict
- Additional resources that can be accessed read/write by
- preprocessors and filters.
- """
- nb_copy, resources = super().from_notebook_node(nb, resources, **kw)
- resources.setdefault("raw_mimetypes", self.raw_mimetypes)
- resources.setdefault("output_mimetype", self.output_mimetype)
- resources["global_content_filter"] = {
- "include_code": not self.exclude_code_cell,
- "include_markdown": not self.exclude_markdown,
- "include_raw": not self.exclude_raw,
- "include_unknown": not self.exclude_unknown,
- "include_input": not self.exclude_input,
- "include_output": not self.exclude_output,
- "include_output_stdin": not self.exclude_output_stdin,
- "include_input_prompt": not self.exclude_input_prompt,
- "include_output_prompt": not self.exclude_output_prompt,
- "no_prompt": self.exclude_input_prompt and self.exclude_output_prompt,
- }
- # Top level variables are passed to the template_exporter here.
- output = self.template.render(nb=nb_copy, resources=resources)
- output = output.lstrip("\r\n")
- return output, resources
- def _register_filter(self, environ, name, jinja_filter):
- """
- Register a filter.
- A filter is a function that accepts and acts on one string.
- The filters are accessible within the Jinja templating engine.
- Parameters
- ----------
- name : str
- name to give the filter in the Jinja engine
- filter : filter
- """
- if jinja_filter is None:
- msg = "filter"
- raise TypeError(msg)
- isclass = isinstance(jinja_filter, type)
- constructed = not isclass
- # Handle filter's registration based on it's type
- if constructed and isinstance(jinja_filter, (str,)):
- # filter is a string, import the namespace and recursively call
- # this register_filter method
- filter_cls = import_item(jinja_filter)
- return self._register_filter(environ, name, filter_cls)
- if constructed and callable(jinja_filter):
- # filter is a function, no need to construct it.
- environ.filters[name] = jinja_filter
- return jinja_filter
- if isclass and issubclass(jinja_filter, HasTraits):
- # filter is configurable. Make sure to pass in new default for
- # the enabled flag if one was specified.
- filter_instance = jinja_filter(parent=self)
- self._register_filter(environ, name, filter_instance)
- return None
- if isclass:
- # filter is not configurable, construct it
- filter_instance = jinja_filter()
- self._register_filter(environ, name, filter_instance)
- return None
- # filter is an instance of something without a __call__
- # attribute.
- msg = "filter"
- raise TypeError(msg)
- def register_filter(self, name, jinja_filter):
- """
- Register a filter.
- A filter is a function that accepts and acts on one string.
- The filters are accessible within the Jinja templating engine.
- Parameters
- ----------
- name : str
- name to give the filter in the Jinja engine
- filter : filter
- """
- return self._register_filter(self.environment, name, jinja_filter)
- def default_filters(self):
- """Override in subclasses to provide extra filters.
- This should return an iterable of 2-tuples: (name, class-or-function).
- You should call the method on the parent class and include the filters
- it provides.
- If a name is repeated, the last filter provided wins. Filters from
- user-supplied config win over filters provided by classes.
- """
- return default_filters.items()
- def _create_environment(self):
- """
- Create the Jinja templating environment.
- """
- paths = self.template_paths
- self.log.debug("Template paths:\n\t%s", "\n\t".join(paths))
- loaders = [
- *self.extra_loaders,
- ExtensionTolerantLoader(FileSystemLoader(paths), self.template_extension),
- DictLoader({self._raw_template_key: self.raw_template}),
- ]
- environment = Environment( # noqa: S701
- loader=ChoiceLoader(loaders),
- extensions=JINJA_EXTENSIONS,
- enable_async=self.enable_async,
- )
- environment.globals["uuid4"] = uuid.uuid4
- # Add default filters to the Jinja2 environment
- for key, value in self.default_filters():
- self._register_filter(environment, key, value)
- # Load user filters. Overwrite existing filters if need be.
- if self.filters:
- for key, user_filter in self.filters.items():
- self._register_filter(environment, key, user_filter)
- return environment
- def _init_preprocessors(self):
- super()._init_preprocessors()
- conf = self._get_conf()
- preprocessors = conf.get("preprocessors", {})
- # preprocessors is a dict for three reasons
- # * We rely on recursive_update, which can only merge dicts, lists will be overwritten
- # * We can use the key with numerical prefixing to guarantee ordering (/etc/*.d/XY-file style)
- # * We can disable preprocessors by overwriting the value with None
- for _, preprocessor in sorted(preprocessors.items(), key=lambda x: x[0]):
- if preprocessor is not None:
- kwargs = preprocessor.copy()
- preprocessor_cls = kwargs.pop("type")
- preprocessor_cls = import_item(preprocessor_cls)
- if preprocessor_cls.__name__ in self.config:
- kwargs.update(self.config[preprocessor_cls.__name__])
- preprocessor = preprocessor_cls(**kwargs) # noqa: PLW2901
- self.register_preprocessor(preprocessor)
- def _get_conf(self):
- conf: dict[str, t.Any] = {} # the configuration once all conf files are merged
- for path in map(Path, self.template_paths):
- conf_path = path / "conf.json"
- try:
- conf_path_exists = conf_path.exists()
- except PermissionError:
- # for Python <3.14
- pass
- else:
- if conf_path_exists:
- with conf_path.open() as f:
- conf = recursive_update(conf, json.load(f))
- return conf
- @default("template_paths")
- def _template_paths(self, prune=True, root_dirs=None):
- paths = []
- root_dirs = self.get_prefix_root_dirs()
- template_names = self.get_template_names()
- for template_name in template_names:
- for base_dir in self.extra_template_basedirs:
- path = os.path.join(base_dir, template_name)
- try:
- if not prune or os.path.exists(path):
- paths.append(path)
- except PermissionError:
- pass
- for root_dir in root_dirs:
- base_dir = os.path.join(root_dir, "nbconvert", "templates")
- path = os.path.join(base_dir, template_name)
- try:
- if not prune or os.path.exists(path):
- paths.append(path)
- except PermissionError:
- pass
- for root_dir in root_dirs:
- # we include root_dir for when we want to be very explicit, e.g.
- # {% extends 'nbconvert/templates/classic/base.html' %}
- paths.append(root_dir)
- # we include base_dir for when we want to be explicit, but less than root_dir, e.g.
- # {% extends 'classic/base.html' %}
- base_dir = os.path.join(root_dir, "nbconvert", "templates")
- paths.append(base_dir)
- compatibility_dir = os.path.join(root_dir, "nbconvert", "templates", "compatibility")
- paths.append(compatibility_dir)
- additional_paths = []
- for path in self.template_data_paths:
- if not prune or os.path.exists(path):
- additional_paths.append(path)
- return paths + self.extra_template_paths + additional_paths
- @classmethod
- def get_compatibility_base_template_conf(cls, name):
- """Get the base template config."""
- # Hard-coded base template confs to use for backwards compatibility for 5.x-only templates
- if name == "display_priority":
- return {"base_template": "base"}
- if name == "full":
- return {"base_template": "classic", "mimetypes": {"text/html": True}}
- return None
- def get_template_names(self):
- """Finds a list of template names where each successive template name is the base template"""
- template_names = []
- root_dirs = self.get_prefix_root_dirs()
- base_template: str | None = self.template_name
- merged_conf: dict[str, t.Any] = {} # the configuration once all conf files are merged
- while base_template is not None:
- template_names.append(base_template)
- conf: dict[str, t.Any] = {}
- found_at_least_one = False
- for base_dir in self.extra_template_basedirs:
- template_dir = os.path.join(base_dir, base_template)
- if os.path.exists(template_dir):
- found_at_least_one = True
- conf_file = os.path.join(template_dir, "conf.json")
- if os.path.exists(conf_file):
- with open(conf_file) as f:
- conf = recursive_update(json.load(f), conf)
- for root_dir in root_dirs:
- template_dir = os.path.join(root_dir, "nbconvert", "templates", base_template)
- if os.path.exists(template_dir):
- found_at_least_one = True
- conf_file = os.path.join(template_dir, "conf.json")
- if os.path.exists(conf_file):
- with open(conf_file) as f:
- conf = recursive_update(json.load(f), conf)
- if not found_at_least_one:
- # Check for backwards compatibility template names
- for root_dir in root_dirs:
- compatibility_file = base_template + ".tpl"
- compatibility_path = os.path.join(
- root_dir, "nbconvert", "templates", "compatibility", compatibility_file
- )
- if os.path.exists(compatibility_path):
- found_at_least_one = True
- warnings.warn(
- f"5.x template name passed '{self.template_name}'. Use 'lab' or 'classic' for new template usage.",
- DeprecationWarning,
- stacklevel=2,
- )
- self.template_file = compatibility_file
- conf = self.get_compatibility_base_template_conf(base_template)
- self.template_name = t.cast(str, conf.get("base_template"))
- break
- if not found_at_least_one:
- paths = "\n\t".join(root_dirs)
- msg = f"No template sub-directory with name {base_template!r} found in the following paths:\n\t{paths}"
- raise ValueError(msg)
- merged_conf = recursive_update(dict(conf), merged_conf)
- base_template = t.cast(t.Any, conf.get("base_template"))
- conf = merged_conf
- mimetypes = [mimetype for mimetype, enabled in conf.get("mimetypes", {}).items() if enabled]
- if self.output_mimetype and self.output_mimetype not in mimetypes and mimetypes:
- supported_mimetypes = "\n\t".join(mimetypes)
- msg = f"Unsupported mimetype {self.output_mimetype!r} for template {self.template_name!r}, mimetypes supported are: \n\t{supported_mimetypes}"
- raise ValueError(msg)
- return template_names
- def get_prefix_root_dirs(self):
- """Get the prefix root dirs."""
- # We look at the usual jupyter locations, and for development purposes also
- # relative to the package directory (first entry, meaning with highest precedence)
- root_dirs = []
- if DEV_MODE:
- root_dirs.append(os.path.abspath(os.path.join(ROOT, "..", "..", "share", "jupyter")))
- root_dirs.extend(jupyter_path())
- return root_dirs
- def _init_resources(self, resources):
- resources = super()._init_resources(resources)
- resources["deprecated"] = deprecated
- return resources
|