abbr.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. import re
  2. import types
  3. from typing import TYPE_CHECKING, Match
  4. from ..helpers import PREVENT_BACKSLASH
  5. from ..util import escape
  6. if TYPE_CHECKING:
  7. from ..block_parser import BlockParser
  8. from ..core import BaseRenderer, BlockState, InlineState
  9. from ..inline_parser import InlineParser
  10. from ..markdown import Markdown
  11. __all__ = ["abbr"]
  12. # https://michelf.ca/projects/php-markdown/extra/#abbr
  13. REF_ABBR = (
  14. r"^ {0,3}\*\[(?P<abbr_key>[^\]]+)" + PREVENT_BACKSLASH + r"\]:"
  15. r"(?P<abbr_text>(?:[ \t]*\n(?: {3,}|\t)[^\n]+)|(?:[^\n]*))$"
  16. )
  17. def parse_ref_abbr(block: "BlockParser", m: Match[str], state: "BlockState") -> int:
  18. ref = state.env.get("ref_abbrs")
  19. if not ref:
  20. ref = {}
  21. key = m.group("abbr_key")
  22. text = m.group("abbr_text")
  23. ref[key] = text.strip()
  24. state.env["ref_abbrs"] = ref
  25. # abbr definition can split paragraph
  26. state.append_token({"type": "blank_line"})
  27. return m.end() + 1
  28. def process_text(inline: "InlineParser", text: str, state: "InlineState") -> None:
  29. ref = state.env.get("ref_abbrs")
  30. if not ref:
  31. return state.append_token({"type": "text", "raw": text})
  32. if state.tokens:
  33. last = state.tokens[-1]
  34. if last["type"] == "text":
  35. state.tokens.pop()
  36. text = last["raw"] + text
  37. abbrs_re = state.env.get("abbrs_re")
  38. if not abbrs_re:
  39. abbrs_re = re.compile(r"|".join(re.escape(k) for k in ref.keys()))
  40. state.env["abbrs_re"] = abbrs_re
  41. pos = 0
  42. while pos < len(text):
  43. m = abbrs_re.search(text, pos)
  44. if not m:
  45. break
  46. end_pos = m.start()
  47. if end_pos > pos:
  48. hole = text[pos:end_pos]
  49. state.append_token({"type": "text", "raw": hole})
  50. label = m.group(0)
  51. state.append_token(
  52. {"type": "abbr", "children": [{"type": "text", "raw": label}], "attrs": {"title": ref[label]}}
  53. )
  54. pos = m.end()
  55. if pos == 0:
  56. # special case, just pure text
  57. state.append_token({"type": "text", "raw": text})
  58. elif pos < len(text):
  59. state.append_token({"type": "text", "raw": text[pos:]})
  60. def render_abbr(renderer: "BaseRenderer", text: str, title: str) -> str:
  61. if not title:
  62. return "<abbr>" + text + "</abbr>"
  63. return '<abbr title="' + escape(title) + '">' + text + "</abbr>"
  64. def abbr(md: "Markdown") -> None:
  65. """A mistune plugin to support abbreviations, spec defined at
  66. https://michelf.ca/projects/php-markdown/extra/#abbr
  67. Here is an example:
  68. .. code-block:: text
  69. The HTML specification
  70. is maintained by the W3C.
  71. *[HTML]: Hyper Text Markup Language
  72. *[W3C]: World Wide Web Consortium
  73. It will be converted into HTML:
  74. .. code-block:: html
  75. The <abbr title="Hyper Text Markup Language">HTML</abbr> specification
  76. is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.
  77. :param md: Markdown instance
  78. """
  79. md.block.register("ref_abbr", REF_ABBR, parse_ref_abbr, before="paragraph")
  80. # replace process_text
  81. md.inline.process_text = types.MethodType(process_text, md.inline) # type: ignore[method-assign]
  82. if md.renderer and md.renderer.NAME == "html":
  83. md.renderer.register("abbr", render_abbr)