html_block.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # HTML block
  2. from __future__ import annotations
  3. import logging
  4. import re
  5. from ..common.html_blocks import block_names
  6. from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
  7. from .state_block import StateBlock
  8. LOGGER = logging.getLogger(__name__)
  9. # An array of opening and corresponding closing sequences for html tags,
  10. # last argument defines whether it can terminate a paragraph or not
  11. HTML_SEQUENCES: list[tuple[re.Pattern[str], re.Pattern[str], bool]] = [
  12. (
  13. re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
  14. re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
  15. True,
  16. ),
  17. (re.compile(r"^<!--"), re.compile(r"-->"), True),
  18. (re.compile(r"^<\?"), re.compile(r"\?>"), True),
  19. (re.compile(r"^<![A-Z]"), re.compile(r">"), True),
  20. (re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
  21. (
  22. re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
  23. re.compile(r"^$"),
  24. True,
  25. ),
  26. (re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
  27. ]
  28. def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
  29. LOGGER.debug(
  30. "entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
  31. )
  32. pos = state.bMarks[startLine] + state.tShift[startLine]
  33. maximum = state.eMarks[startLine]
  34. if state.is_code_block(startLine):
  35. return False
  36. if not state.md.options.get("html", None):
  37. return False
  38. if state.src[pos] != "<":
  39. return False
  40. lineText = state.src[pos:maximum]
  41. html_seq = None
  42. for HTML_SEQUENCE in HTML_SEQUENCES:
  43. if HTML_SEQUENCE[0].search(lineText):
  44. html_seq = HTML_SEQUENCE
  45. break
  46. if not html_seq:
  47. return False
  48. if silent:
  49. # true if this sequence can be a terminator, false otherwise
  50. return html_seq[2]
  51. nextLine = startLine + 1
  52. # If we are here - we detected HTML block.
  53. # Let's roll down till block end.
  54. if not html_seq[1].search(lineText):
  55. while nextLine < endLine:
  56. if state.sCount[nextLine] < state.blkIndent:
  57. break
  58. pos = state.bMarks[nextLine] + state.tShift[nextLine]
  59. maximum = state.eMarks[nextLine]
  60. lineText = state.src[pos:maximum]
  61. if html_seq[1].search(lineText):
  62. if len(lineText) != 0:
  63. nextLine += 1
  64. break
  65. nextLine += 1
  66. state.line = nextLine
  67. token = state.push("html_block", "", 0)
  68. token.map = [startLine, nextLine]
  69. token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
  70. return True