state_inline.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. from __future__ import annotations
  2. from collections import namedtuple
  3. from dataclasses import dataclass
  4. from typing import TYPE_CHECKING, Any, Literal
  5. from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
  6. from ..ruler import StateBase
  7. from ..token import Token
  8. from ..utils import EnvType
  9. if TYPE_CHECKING:
  10. from markdown_it import MarkdownIt
  11. @dataclass(slots=True)
  12. class Delimiter:
  13. # Char code of the starting marker (number).
  14. marker: int
  15. # Total length of these series of delimiters.
  16. length: int
  17. # A position of the token this delimiter corresponds to.
  18. token: int
  19. # If this delimiter is matched as a valid opener, `end` will be
  20. # equal to its position, otherwise it's `-1`.
  21. end: int
  22. # Boolean flags that determine if this delimiter could open or close
  23. # an emphasis.
  24. open: bool
  25. close: bool
  26. level: bool | None = None
  27. Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
  28. class StateInline(StateBase):
  29. def __init__(
  30. self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
  31. ) -> None:
  32. self.src = src
  33. self.env = env
  34. self.md = md
  35. self.tokens = outTokens
  36. self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens)
  37. self.pos = 0
  38. self.posMax = len(self.src)
  39. self.level = 0
  40. self.pending = ""
  41. self.pendingLevel = 0
  42. # Stores { start: end } pairs. Useful for backtrack
  43. # optimization of pairs parse (emphasis, strikes).
  44. self.cache: dict[int, int] = {}
  45. # List of emphasis-like delimiters for current tag
  46. self.delimiters: list[Delimiter] = []
  47. # Stack of delimiter lists for upper level tags
  48. self._prev_delimiters: list[list[Delimiter]] = []
  49. # backticklength => last seen position
  50. self.backticks: dict[int, int] = {}
  51. self.backticksScanned = False
  52. # Counter used to disable inline linkify-it execution
  53. # inside <a> and markdown links
  54. self.linkLevel = 0
  55. def __repr__(self) -> str:
  56. return (
  57. f"{self.__class__.__name__}"
  58. f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
  59. )
  60. def pushPending(self) -> Token:
  61. token = Token("text", "", 0)
  62. token.content = self.pending
  63. token.level = self.pendingLevel
  64. self.tokens.append(token)
  65. self.pending = ""
  66. return token
  67. def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
  68. """Push new token to "stream".
  69. If pending text exists - flush it as text token
  70. """
  71. if self.pending:
  72. self.pushPending()
  73. token = Token(ttype, tag, nesting)
  74. token_meta = None
  75. if nesting < 0:
  76. # closing tag
  77. self.level -= 1
  78. self.delimiters = self._prev_delimiters.pop()
  79. token.level = self.level
  80. if nesting > 0:
  81. # opening tag
  82. self.level += 1
  83. self._prev_delimiters.append(self.delimiters)
  84. self.delimiters = []
  85. token_meta = {"delimiters": self.delimiters}
  86. self.pendingLevel = self.level
  87. self.tokens.append(token)
  88. self.tokens_meta.append(token_meta)
  89. return token
  90. def scanDelims(self, start: int, canSplitWord: bool) -> Scanned:
  91. """
  92. Scan a sequence of emphasis-like markers, and determine whether
  93. it can start an emphasis sequence or end an emphasis sequence.
  94. - start - position to scan from (it should point at a valid marker);
  95. - canSplitWord - determine if these markers can be found inside a word
  96. """
  97. pos = start
  98. maximum = self.posMax
  99. marker = self.src[start]
  100. # treat beginning of the line as a whitespace
  101. lastChar = self.src[start - 1] if start > 0 else " "
  102. while pos < maximum and self.src[pos] == marker:
  103. pos += 1
  104. count = pos - start
  105. # treat end of the line as a whitespace
  106. nextChar = self.src[pos] if pos < maximum else " "
  107. isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar)
  108. isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar)
  109. isLastWhiteSpace = isWhiteSpace(ord(lastChar))
  110. isNextWhiteSpace = isWhiteSpace(ord(nextChar))
  111. left_flanking = not (
  112. isNextWhiteSpace
  113. or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar))
  114. )
  115. right_flanking = not (
  116. isLastWhiteSpace
  117. or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar))
  118. )
  119. can_open = left_flanking and (
  120. canSplitWord or (not right_flanking) or isLastPunctChar
  121. )
  122. can_close = right_flanking and (
  123. canSplitWord or (not left_flanking) or isNextPunctChar
  124. )
  125. return Scanned(can_open, can_close, count)