util.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import re
  2. from html import _replace_charref # type: ignore[attr-defined]
  3. from typing import Match
  4. from urllib.parse import quote
  5. _expand_tab_re = re.compile(r"^( {0,3})\t", flags=re.M)
  6. def expand_leading_tab(text: str, width: int = 4) -> str:
  7. def repl(m: Match[str]) -> str:
  8. s = m.group(1)
  9. return s + " " * (width - len(s))
  10. return _expand_tab_re.sub(repl, text)
  11. def expand_tab(text: str, space: str = " ") -> str:
  12. repl = r"\1" + space
  13. return _expand_tab_re.sub(repl, text)
  14. def escape(s: str, quote: bool = True) -> str:
  15. """Escape characters of ``&<>``. If quote=True, ``"`` will be
  16. converted to ``&quote;``."""
  17. s = s.replace("&", "&amp;")
  18. s = s.replace("<", "&lt;")
  19. s = s.replace(">", "&gt;")
  20. if quote:
  21. s = s.replace('"', "&quot;")
  22. return s
  23. def escape_url(link: str) -> str:
  24. """Escape URL for safety."""
  25. safe = (
  26. ":/?#@" # gen-delims - '[]' (rfc3986)
  27. "!$&()*+,;=" # sub-delims - "'" (rfc3986)
  28. "%" # leave already-encoded octets alone
  29. )
  30. return quote(unescape(link), safe=safe)
  31. def safe_entity(s: str) -> str:
  32. """Escape characters for safety."""
  33. return escape(unescape(s))
  34. def unikey(s: str) -> str:
  35. """Generate a unique key for links and footnotes."""
  36. key = " ".join(s.split()).strip()
  37. return key.lower().upper()
  38. _charref_re = re.compile(
  39. r"&(#[0-9]{1,7};"
  40. r"|#[xX][0-9a-fA-F]+;"
  41. r"|[^\t\n\f <&#;]{1,32};)"
  42. )
  43. def unescape(s: str) -> str:
  44. """
  45. Copy from `html.unescape`, but `_charref` is different. CommonMark
  46. does not accept entity references without a trailing semicolon
  47. """
  48. if "&" not in s:
  49. return s
  50. return _charref_re.sub(_replace_charref, s)
  51. _striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
  52. def striptags(s: str) -> str:
  53. return _striptags_re.sub("", s)
  54. _strip_end_re = re.compile(r"\n\s+$")
  55. def strip_end(src: str) -> str:
  56. return _strip_end_re.sub("\n", src)