html.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. from typing import Any, ClassVar, Dict, Optional, Tuple, Literal
  2. from ..core import BaseRenderer, BlockState
  3. from ..util import escape as escape_text
  4. from ..util import safe_entity, striptags
  5. class HTMLRenderer(BaseRenderer):
  6. """A renderer for converting Markdown to HTML."""
  7. _escape: bool
  8. NAME: ClassVar[Literal["html"]] = "html"
  9. HARMFUL_PROTOCOLS: ClassVar[Tuple[str, ...]] = (
  10. "javascript:",
  11. "vbscript:",
  12. "file:",
  13. "data:",
  14. )
  15. GOOD_DATA_PROTOCOLS: ClassVar[Tuple[str, ...]] = (
  16. "data:image/gif;",
  17. "data:image/png;",
  18. "data:image/jpeg;",
  19. "data:image/webp;",
  20. )
  21. def __init__(self, escape: bool = True, allow_harmful_protocols: Optional[bool] = None) -> None:
  22. super(HTMLRenderer, self).__init__()
  23. self._allow_harmful_protocols = allow_harmful_protocols
  24. self._escape = escape
  25. def render_token(self, token: Dict[str, Any], state: BlockState) -> str:
  26. # backward compitable with v2
  27. func = self._get_method(token["type"])
  28. attrs = token.get("attrs")
  29. if "raw" in token:
  30. text = token["raw"]
  31. elif "children" in token:
  32. text = self.render_tokens(token["children"], state)
  33. else:
  34. if attrs:
  35. return func(**attrs)
  36. else:
  37. return func()
  38. if attrs:
  39. return func(text, **attrs)
  40. else:
  41. return func(text)
  42. def safe_url(self, url: str) -> str:
  43. """Ensure the given URL is safe. This method is used for rendering
  44. links, images, and etc.
  45. """
  46. if self._allow_harmful_protocols is True:
  47. return escape_text(url)
  48. _url = url.lower()
  49. if self._allow_harmful_protocols and _url.startswith(tuple(self._allow_harmful_protocols)):
  50. return escape_text(url)
  51. if _url.startswith(self.HARMFUL_PROTOCOLS) and not _url.startswith(self.GOOD_DATA_PROTOCOLS):
  52. return "#harmful-link"
  53. return escape_text(url)
  54. def text(self, text: str) -> str:
  55. if self._escape:
  56. return escape_text(text)
  57. return safe_entity(text)
  58. def emphasis(self, text: str) -> str:
  59. return "<em>" + text + "</em>"
  60. def strong(self, text: str) -> str:
  61. return "<strong>" + text + "</strong>"
  62. def link(self, text: str, url: str, title: Optional[str] = None) -> str:
  63. s = '<a href="' + self.safe_url(url) + '"'
  64. if title:
  65. s += ' title="' + safe_entity(title) + '"'
  66. return s + ">" + text + "</a>"
  67. def image(self, text: str, url: str, title: Optional[str] = None) -> str:
  68. src = self.safe_url(url)
  69. alt = escape_text(striptags(text))
  70. s = '<img src="' + src + '" alt="' + alt + '"'
  71. if title:
  72. s += ' title="' + safe_entity(title) + '"'
  73. return s + " />"
  74. def codespan(self, text: str) -> str:
  75. return "<code>" + escape_text(text) + "</code>"
  76. def linebreak(self) -> str:
  77. return "<br />\n"
  78. def softbreak(self) -> str:
  79. return "\n"
  80. def inline_html(self, html: str) -> str:
  81. if self._escape:
  82. return escape_text(html)
  83. return html
  84. def paragraph(self, text: str) -> str:
  85. return "<p>" + text + "</p>\n"
  86. def heading(self, text: str, level: int, **attrs: Any) -> str:
  87. tag = "h" + str(level)
  88. html = "<" + tag
  89. _id = attrs.get("id")
  90. if _id:
  91. html += ' id="' + _id + '"'
  92. return html + ">" + text + "</" + tag + ">\n"
  93. def blank_line(self) -> str:
  94. return ""
  95. def thematic_break(self) -> str:
  96. return "<hr />\n"
  97. def block_text(self, text: str) -> str:
  98. return text
  99. def block_code(self, code: str, info: Optional[str] = None) -> str:
  100. html = "<pre><code"
  101. if info is not None:
  102. info = safe_entity(info.strip())
  103. if info:
  104. lang = info.split(None, 1)[0]
  105. html += ' class="language-' + lang + '"'
  106. return html + ">" + escape_text(code) + "</code></pre>\n"
  107. def block_quote(self, text: str) -> str:
  108. return "<blockquote>\n" + text + "</blockquote>\n"
  109. def block_html(self, html: str) -> str:
  110. if self._escape:
  111. return "<p>" + escape_text(html.strip()) + "</p>\n"
  112. return html + "\n"
  113. def block_error(self, text: str) -> str:
  114. return '<div class="error"><pre>' + text + "</pre></div>\n"
  115. def list(self, text: str, ordered: bool, **attrs: Any) -> str:
  116. if ordered:
  117. html = "<ol"
  118. start = attrs.get("start")
  119. if start is not None:
  120. html += ' start="' + str(start) + '"'
  121. return html + ">\n" + text + "</ol>\n"
  122. return "<ul>\n" + text + "</ul>\n"
  123. def list_item(self, text: str) -> str:
  124. return "<li>" + text + "</li>\n"