html.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. """HTML Exporter class"""
  2. # Copyright (c) Jupyter Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. import base64
  5. import json
  6. import mimetypes
  7. import os
  8. from pathlib import Path
  9. from typing import Any, Optional
  10. import jinja2
  11. import markupsafe
  12. from bs4 import BeautifulSoup # type: ignore[import-not-found]
  13. from jupyter_core.paths import jupyter_path
  14. from traitlets import Bool, Dict, Unicode, default, validate
  15. from traitlets.config import Config
  16. if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0):
  17. from jinja2 import contextfilter # type:ignore[attr-defined]
  18. else:
  19. from jinja2 import pass_context as contextfilter
  20. from jinja2.loaders import split_template_path
  21. from nbformat import NotebookNode
  22. from nbconvert.filters.highlight import Highlight2HTML
  23. from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath
  24. from nbconvert.filters.widgetsdatatypefilter import WidgetsDataTypeFilter
  25. from nbconvert.utils.iso639_1 import iso639_1
  26. from .templateexporter import TemplateExporter
  27. def find_lab_theme(theme_name):
  28. """
  29. Find a JupyterLab theme location by name.
  30. Parameters
  31. ----------
  32. theme_name : str
  33. The name of the labextension theme you want to find.
  34. Raises
  35. ------
  36. ValueError
  37. If the theme was not found, or if it was not specific enough.
  38. Returns
  39. -------
  40. theme_name: str
  41. Full theme name (with scope, if any)
  42. labextension_path : Path
  43. The path to the found labextension on the system.
  44. """
  45. paths = jupyter_path("labextensions")
  46. matching_themes = []
  47. theme_path = None
  48. for path in paths:
  49. for dirpath, dirnames, filenames in os.walk(path):
  50. # If it's a federated labextension that contains themes
  51. if "package.json" in filenames and "themes" in dirnames:
  52. # TODO Find the theme name in the JS code instead?
  53. # TODO Find if it's a light or dark theme?
  54. with open(Path(dirpath) / "package.json", encoding="utf-8") as fobj:
  55. labext_name = json.loads(fobj.read())["name"]
  56. if labext_name == theme_name or theme_name in labext_name.split("/"):
  57. matching_themes.append(labext_name)
  58. full_theme_name = labext_name
  59. theme_path = Path(dirpath) / "themes" / labext_name
  60. if len(matching_themes) == 0:
  61. msg = f'Could not find lab theme "{theme_name}"'
  62. raise ValueError(msg)
  63. if len(matching_themes) > 1:
  64. msg = (
  65. f'Found multiple themes matching "{theme_name}": {matching_themes}. '
  66. "Please be more specific about which theme you want to use."
  67. )
  68. raise ValueError(msg)
  69. return full_theme_name, theme_path
  70. class HTMLExporter(TemplateExporter):
  71. """
  72. Exports a basic HTML document. This exporter assists with the export of
  73. HTML. Inherit from it if you are writing your own HTML template and need
  74. custom preprocessors/filters. If you don't need custom preprocessors/
  75. filters, just change the 'template_file' config option.
  76. """
  77. export_from_notebook = "HTML"
  78. anchor_link_text = Unicode("¶", help="The text used as the text for anchor links.").tag(
  79. config=True
  80. )
  81. exclude_anchor_links = Bool(False, help="If anchor links should be included or not.").tag(
  82. config=True
  83. )
  84. require_js_url = Unicode(
  85. "https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js",
  86. help="""
  87. URL to load require.js from.
  88. Defaults to loading from cdnjs.
  89. """,
  90. ).tag(config=True)
  91. mathjax_url = Unicode(
  92. "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML-full,Safe",
  93. help="""
  94. URL to load Mathjax from.
  95. Defaults to loading from cdnjs.
  96. """,
  97. ).tag(config=True)
  98. mermaid_js_url = Unicode(
  99. "https://cdnjs.cloudflare.com/ajax/libs/mermaid/11.10.0/mermaid.esm.min.mjs",
  100. help="""
  101. URL to load MermaidJS from.
  102. Defaults to loading from cdnjs.
  103. """,
  104. )
  105. mermaid_layout_elk_js_url = Unicode(
  106. "https://cdnjs.cloudflare.com/ajax/libs/mermaid-layout-elk/0.1.9/mermaid-layout-elk.esm.min.mjs",
  107. help="""
  108. URL to load MermaidJS ELK layout from.
  109. Defaults to loading from cdnjs.
  110. """,
  111. )
  112. jquery_url = Unicode(
  113. "https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js",
  114. help="""
  115. URL to load jQuery from.
  116. Defaults to loading from cdnjs.
  117. """,
  118. ).tag(config=True)
  119. jupyter_widgets_base_url = Unicode(
  120. "https://unpkg.com/", help="URL base for Jupyter widgets"
  121. ).tag(config=True)
  122. widget_renderer_url = Unicode("", help="Full URL for Jupyter widgets").tag(config=True)
  123. html_manager_semver_range = Unicode(
  124. "*", help="Semver range for Jupyter widgets HTML manager"
  125. ).tag(config=True)
  126. @default("file_extension")
  127. def _file_extension_default(self):
  128. return ".html"
  129. @default("template_name")
  130. def _template_name_default(self):
  131. return "lab"
  132. theme = Unicode(
  133. "light",
  134. help="Template specific theme(e.g. the name of a JupyterLab CSS theme distributed as prebuilt extension for the lab template)",
  135. ).tag(config=True)
  136. sanitize_html = Bool(
  137. False,
  138. help=(
  139. "Whether the HTML in Markdown cells and cell outputs should be sanitized."
  140. "This should be set to True by nbviewer or similar tools."
  141. ),
  142. ).tag(config=True)
  143. skip_svg_encoding = Bool(
  144. False,
  145. help=("Whether the svg to image data attribute encoding should occur"),
  146. ).tag(config=True)
  147. embed_images = Bool(
  148. False, help="Whether or not to embed images as base64 in markdown cells."
  149. ).tag(config=True)
  150. output_mimetype = "text/html"
  151. lexer_options = Dict(
  152. {},
  153. help=(
  154. "Options to be passed to the pygments lexer for highlighting markdown code blocks. "
  155. "See https://pygments.org/docs/lexers/#available-lexers for available options."
  156. ),
  157. ).tag(config=True)
  158. @property
  159. def default_config(self):
  160. c = Config(
  161. {
  162. "NbConvertBase": {
  163. "display_data_priority": [
  164. "application/vnd.jupyter.widget-view+json",
  165. "application/javascript",
  166. "text/html",
  167. "text/markdown",
  168. "image/svg+xml",
  169. "text/vnd.mermaid",
  170. "text/latex",
  171. "image/png",
  172. "image/jpeg",
  173. "text/plain",
  174. ]
  175. },
  176. "HighlightMagicsPreprocessor": {"enabled": True},
  177. }
  178. )
  179. if super().default_config:
  180. c2 = super().default_config.copy()
  181. c2.merge(c)
  182. c = c2
  183. return c
  184. language_code = Unicode(
  185. "en", help="Language code of the content, should be one of the ISO639-1"
  186. ).tag(config=True)
  187. @validate("language_code")
  188. def _valid_language_code(self, proposal):
  189. if self.language_code not in iso639_1:
  190. self.log.warning(
  191. '"%s" is not an ISO 639-1 language code. '
  192. 'It has been replaced by the default value "en".',
  193. self.language_code,
  194. )
  195. return proposal["trait"].default_value
  196. return proposal["value"]
  197. @contextfilter
  198. def markdown2html(self, context, source):
  199. """Markdown to HTML filter respecting the anchor_link_text setting"""
  200. cell = context.get("cell", {})
  201. attachments = cell.get("attachments", {})
  202. path = context.get("resources", {}).get("metadata", {}).get("path", "")
  203. renderer = IPythonRenderer(
  204. escape=False,
  205. attachments=attachments,
  206. embed_images=self.embed_images,
  207. path=path,
  208. anchor_link_text=self.anchor_link_text,
  209. exclude_anchor_links=self.exclude_anchor_links,
  210. **self.lexer_options,
  211. )
  212. return MarkdownWithMath(renderer=renderer).render(source)
  213. def default_filters(self):
  214. """Get the default filters."""
  215. yield from super().default_filters()
  216. yield ("markdown2html", self.markdown2html)
  217. def from_notebook_node( # type:ignore[override]
  218. self, nb: NotebookNode, resources: Optional[dict[str, Any]] = None, **kw: Any
  219. ) -> tuple[str, dict[str, Any]]:
  220. """Convert from notebook node."""
  221. langinfo = nb.metadata.get("language_info", {})
  222. lexer = langinfo.get("pygments_lexer", langinfo.get("name", None))
  223. highlight_code = self.filters.get(
  224. "highlight_code", Highlight2HTML(pygments_lexer=lexer, parent=self)
  225. )
  226. resources = self._init_resources(resources)
  227. filter_data_type = WidgetsDataTypeFilter(
  228. notebook_metadata=self._nb_metadata, parent=self, resources=resources
  229. )
  230. self.register_filter("highlight_code", highlight_code)
  231. self.register_filter("filter_data_type", filter_data_type)
  232. html, resources = super().from_notebook_node(nb, resources, **kw)
  233. soup = BeautifulSoup(html, features="html.parser")
  234. # Add image's alternative text
  235. missing_alt = 0
  236. for elem in soup.select("img:not([alt])"):
  237. elem.attrs["alt"] = "No description has been provided for this image"
  238. missing_alt += 1
  239. if missing_alt:
  240. self.log.warning("Alternative text is missing on %s image(s).", missing_alt)
  241. # Set input and output focusable
  242. for elem in soup.select(".jp-Notebook div.jp-Cell-inputWrapper"):
  243. elem.attrs["tabindex"] = "0"
  244. for elem in soup.select(".jp-Notebook div.jp-OutputArea-output"):
  245. elem.attrs["tabindex"] = "0"
  246. return str(soup), resources
  247. def _init_resources(self, resources):
  248. def resources_include_css(name):
  249. env = self.environment
  250. code = """<style type="text/css">\n%s</style>""" % (env.loader.get_source(env, name)[0])
  251. return markupsafe.Markup(code) # noqa:S704
  252. def resources_include_lab_theme(name):
  253. # Try to find the theme with the given name, looking through the labextensions
  254. _, theme_path = find_lab_theme(name)
  255. with open(theme_path / "index.css") as file:
  256. data = file.read()
  257. # Embed assets (fonts, images...)
  258. for asset in os.listdir(theme_path):
  259. local_url = f"url({Path(asset).as_posix()})"
  260. if local_url in data:
  261. mime_type = mimetypes.guess_type(asset)[0]
  262. # Replace asset url by a base64 dataurl
  263. with open(theme_path / asset, "rb") as assetfile:
  264. base64_data = base64.b64encode(assetfile.read())
  265. base64_str = base64_data.replace(b"\n", b"").decode("ascii")
  266. data = data.replace(local_url, f"url(data:{mime_type};base64,{base64_str})")
  267. code = """<style type="text/css">\n%s</style>""" % data
  268. return markupsafe.Markup(code) # noqa:S704
  269. def resources_include_js(name, module=False):
  270. """Get the resources include JS for a name. If module=True, import as ES module"""
  271. env = self.environment
  272. code = f"""<script {'type="module"' if module else ""}>\n{env.loader.get_source(env, name)[0]}</script>"""
  273. return markupsafe.Markup(code) # noqa:S704
  274. def resources_include_url(name):
  275. """Get the resources include url for a name."""
  276. env = self.environment
  277. mime_type, _encoding = mimetypes.guess_type(name)
  278. try:
  279. # we try to load via the jinja loader, but that tries to load
  280. # as (encoded) text
  281. data = env.loader.get_source(env, name)[0].encode("utf8")
  282. except UnicodeDecodeError:
  283. # if that fails (for instance a binary file, png or ttf)
  284. # we mimic jinja2
  285. pieces = split_template_path(name)
  286. for searchpath in self.template_paths:
  287. filename = os.path.join(searchpath, *pieces)
  288. if os.path.exists(filename):
  289. with open(filename, "rb") as f:
  290. data = f.read()
  291. break
  292. else:
  293. msg = f"No file {name!r} found in {searchpath!r}"
  294. raise ValueError(msg)
  295. data = base64.b64encode(data)
  296. data = data.replace(b"\n", b"").decode("ascii")
  297. src = f"data:{mime_type};base64,{data}"
  298. return markupsafe.Markup(src) # noqa:S704
  299. resources = super()._init_resources(resources)
  300. resources["theme"] = self.theme
  301. resources["include_css"] = resources_include_css
  302. resources["include_lab_theme"] = resources_include_lab_theme
  303. resources["include_js"] = resources_include_js
  304. resources["include_url"] = resources_include_url
  305. resources["require_js_url"] = self.require_js_url
  306. resources["mathjax_url"] = self.mathjax_url
  307. resources["mermaid_js_url"] = self.mermaid_js_url
  308. resources["mermaid_layout_elk_js_url"] = self.mermaid_layout_elk_js_url
  309. resources["jquery_url"] = self.jquery_url
  310. resources["jupyter_widgets_base_url"] = self.jupyter_widgets_base_url
  311. resources["widget_renderer_url"] = self.widget_renderer_url
  312. resources["html_manager_semver_range"] = self.html_manager_semver_range
  313. resources["should_sanitize_html"] = self.sanitize_html
  314. resources["language_code"] = self.language_code
  315. resources["should_not_encode_svg"] = self.skip_svg_encoding
  316. return resources