pandoc.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. """Utility for calling pandoc"""
  2. # Copyright (c) IPython Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. import re
  5. import shutil
  6. import subprocess
  7. import warnings
  8. from io import BytesIO, TextIOWrapper
  9. from nbconvert.utils.version import check_version
  10. from .exceptions import ConversionException
  11. _minimal_version = "2.9.2"
  12. _maximal_version = "4.0.0"
  13. def pandoc(source, fmt, to, extra_args=None, encoding="utf-8"):
  14. """Convert an input string using pandoc.
  15. Pandoc converts an input string `from` a format `to` a target format.
  16. Parameters
  17. ----------
  18. source : string
  19. Input string, assumed to be valid format `from`.
  20. fmt : string
  21. The name of the input format (markdown, etc.)
  22. to : string
  23. The name of the output format (html, etc.)
  24. Returns
  25. -------
  26. out : unicode
  27. Output as returned by pandoc.
  28. Raises
  29. ------
  30. PandocMissing
  31. If pandoc is not installed.
  32. Any error messages generated by pandoc are printed to stderr.
  33. """
  34. cmd = ["pandoc", "-f", fmt, "-t", to]
  35. if extra_args:
  36. cmd.extend(extra_args)
  37. # this will raise an exception that will pop us out of here
  38. check_pandoc_version()
  39. # we can safely continue
  40. p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) # noqa: S603
  41. out, _ = p.communicate(source.encode())
  42. out_str = TextIOWrapper(BytesIO(out), encoding, "replace").read()
  43. return out_str.rstrip("\n")
  44. def get_pandoc_version():
  45. """Gets the Pandoc version if Pandoc is installed.
  46. If the minimal version is not met, it will probe Pandoc for its version, cache it and return that value.
  47. If the minimal version is met, it will return the cached version and stop probing Pandoc
  48. (unless `clean_cache()` is called).
  49. Raises
  50. ------
  51. PandocMissing
  52. If pandoc is unavailable.
  53. """
  54. global __version # noqa: PLW0603
  55. if __version is None:
  56. if not shutil.which("pandoc"):
  57. raise PandocMissing()
  58. out = subprocess.check_output(["pandoc", "-v"]) # noqa: S607
  59. out_lines = out.splitlines()
  60. version_pattern = re.compile(r"^\d+(\.\d+){1,}$")
  61. for tok in out_lines[0].decode("ascii", "replace").split():
  62. if version_pattern.match(tok):
  63. __version = tok # type:ignore[assignment]
  64. break
  65. return __version
  66. def check_pandoc_version():
  67. """Returns True if pandoc's version meets at least minimal version.
  68. Raises
  69. ------
  70. PandocMissing
  71. If pandoc is unavailable.
  72. """
  73. if check_pandoc_version._cached is not None: # type:ignore[attr-defined]
  74. return check_pandoc_version._cached # type:ignore[attr-defined]
  75. v = get_pandoc_version()
  76. if v is None:
  77. warnings.warn(
  78. "Sorry, we cannot determine the version of pandoc.\n"
  79. "Please consider reporting this issue and include the"
  80. "output of pandoc --version.\nContinuing...",
  81. RuntimeWarning,
  82. stacklevel=2,
  83. )
  84. return False
  85. ok = check_version(v, _minimal_version, max_v=_maximal_version)
  86. check_pandoc_version._cached = ok # type:ignore[attr-defined]
  87. if not ok:
  88. warnings.warn(
  89. "You are using an unsupported version of pandoc (%s).\n" % v
  90. + "Your version must be at least (%s) " % _minimal_version
  91. + "but less than (%s).\n" % _maximal_version
  92. + "Refer to https://pandoc.org/installing.html.\nContinuing with doubts...",
  93. RuntimeWarning,
  94. stacklevel=2,
  95. )
  96. return ok
  97. check_pandoc_version._cached = None # type:ignore[attr-defined]
  98. # -----------------------------------------------------------------------------
  99. # Exception handling
  100. # -----------------------------------------------------------------------------
  101. class PandocMissing(ConversionException):
  102. """Exception raised when Pandoc is missing."""
  103. def __init__(self, *args, **kwargs):
  104. """Initialize the exception."""
  105. super().__init__(
  106. "Pandoc wasn't found.\n"
  107. "Please check that pandoc is installed:\n"
  108. "https://pandoc.org/installing.html"
  109. )
  110. # -----------------------------------------------------------------------------
  111. # Internal state management
  112. # -----------------------------------------------------------------------------
  113. def clean_cache():
  114. """Clean the internal cache."""
  115. global __version # noqa: PLW0603
  116. __version = None
  117. __version = None