speedup.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import re
  2. import string
  3. from typing import TYPE_CHECKING, Match
  4. if TYPE_CHECKING:
  5. from ..block_parser import BlockParser
  6. from ..core import BlockState, InlineState
  7. from ..inline_parser import InlineParser
  8. from ..markdown import Markdown
  9. # because mismatch is too slow, add parsers for paragraph and text
  10. HARD_LINEBREAK_RE = re.compile(r" *\n\s*")
  11. PARAGRAPH = (
  12. # start with none punctuation, not number, not whitespace
  13. r"(?:^[^\s\d" + re.escape(string.punctuation) + r"][^\n]*\n)+"
  14. )
  15. __all__ = ["speedup"]
  16. def parse_text(inline: "InlineParser", m: Match[str], state: "InlineState") -> int:
  17. text = m.group(0)
  18. text = HARD_LINEBREAK_RE.sub("\n", text)
  19. inline.process_text(text, state)
  20. return m.end()
  21. def parse_paragraph(block: "BlockParser", m: Match[str], state: "BlockState") -> int:
  22. text = m.group(0)
  23. state.add_paragraph(text)
  24. return m.end()
  25. def speedup(md: "Markdown") -> None:
  26. """Increase the speed of parsing paragraph and inline text."""
  27. md.block.register("paragraph", PARAGRAPH, parse_paragraph)
  28. punc = r"\\><!\[_*`~\^\$="
  29. text_pattern = r"[\s\S]+?(?=[" + punc + r"]|"
  30. if "url_link" in md.inline.rules:
  31. text_pattern += "https?:|"
  32. if md.inline.hard_wrap:
  33. text_pattern += r" *\n|"
  34. else:
  35. text_pattern += r" {2,}\n|"
  36. text_pattern += r"$)"
  37. md.inline.register("text", text_pattern, parse_text)