state_block.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. from __future__ import annotations
  2. from typing import TYPE_CHECKING, Literal
  3. from ..common.utils import isStrSpace
  4. from ..ruler import StateBase
  5. from ..token import Token
  6. from ..utils import EnvType
  7. if TYPE_CHECKING:
  8. from markdown_it.main import MarkdownIt
  9. class StateBlock(StateBase):
  10. def __init__(
  11. self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
  12. ) -> None:
  13. self.src = src
  14. # link to parser instance
  15. self.md = md
  16. self.env = env
  17. #
  18. # Internal state variables
  19. #
  20. self.tokens = tokens
  21. self.bMarks: list[int] = [] # line begin offsets for fast jumps
  22. self.eMarks: list[int] = [] # line end offsets for fast jumps
  23. # offsets of the first non-space characters (tabs not expanded)
  24. self.tShift: list[int] = []
  25. self.sCount: list[int] = [] # indents for each line (tabs expanded)
  26. # An amount of virtual spaces (tabs expanded) between beginning
  27. # of each line (bMarks) and real beginning of that line.
  28. #
  29. # It exists only as a hack because blockquotes override bMarks
  30. # losing information in the process.
  31. #
  32. # It's used only when expanding tabs, you can think about it as
  33. # an initial tab length, e.g. bsCount=21 applied to string `\t123`
  34. # means first tab should be expanded to 4-21%4 === 3 spaces.
  35. #
  36. self.bsCount: list[int] = []
  37. # block parser variables
  38. self.blkIndent = 0 # required block content indent (for example, if we are
  39. # inside a list, it would be positioned after list marker)
  40. self.line = 0 # line index in src
  41. self.lineMax = 0 # lines count
  42. self.tight = False # loose/tight mode for lists
  43. self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
  44. self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
  45. # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
  46. # used in lists to determine if they interrupt a paragraph
  47. self.parentType = "root"
  48. self.level = 0
  49. # renderer
  50. self.result = ""
  51. # Create caches
  52. # Generate markers.
  53. indent_found = False
  54. start = pos = indent = offset = 0
  55. length = len(self.src)
  56. for pos, character in enumerate(self.src):
  57. if not indent_found:
  58. if isStrSpace(character):
  59. indent += 1
  60. if character == "\t":
  61. offset += 4 - offset % 4
  62. else:
  63. offset += 1
  64. continue
  65. else:
  66. indent_found = True
  67. if character == "\n" or pos == length - 1:
  68. if character != "\n":
  69. pos += 1
  70. self.bMarks.append(start)
  71. self.eMarks.append(pos)
  72. self.tShift.append(indent)
  73. self.sCount.append(offset)
  74. self.bsCount.append(0)
  75. indent_found = False
  76. indent = 0
  77. offset = 0
  78. start = pos + 1
  79. # Push fake entry to simplify cache bounds checks
  80. self.bMarks.append(length)
  81. self.eMarks.append(length)
  82. self.tShift.append(0)
  83. self.sCount.append(0)
  84. self.bsCount.append(0)
  85. self.lineMax = len(self.bMarks) - 1 # don't count last fake line
  86. # pre-check if code blocks are enabled, to speed up is_code_block method
  87. self._code_enabled = "code" in self.md["block"].ruler.get_active_rules()
  88. def __repr__(self) -> str:
  89. return (
  90. f"{self.__class__.__name__}"
  91. f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
  92. )
  93. def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
  94. """Push new token to "stream"."""
  95. token = Token(ttype, tag, nesting)
  96. token.block = True
  97. if nesting < 0:
  98. self.level -= 1 # closing tag
  99. token.level = self.level
  100. if nesting > 0:
  101. self.level += 1 # opening tag
  102. self.tokens.append(token)
  103. return token
  104. def isEmpty(self, line: int) -> bool:
  105. """."""
  106. return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
  107. def skipEmptyLines(self, from_pos: int) -> int:
  108. """."""
  109. while from_pos < self.lineMax:
  110. try:
  111. if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
  112. from_pos
  113. ]:
  114. break
  115. except IndexError:
  116. pass
  117. from_pos += 1
  118. return from_pos
  119. def skipSpaces(self, pos: int) -> int:
  120. """Skip spaces from given position."""
  121. while True:
  122. try:
  123. current = self.src[pos]
  124. except IndexError:
  125. break
  126. if not isStrSpace(current):
  127. break
  128. pos += 1
  129. return pos
  130. def skipSpacesBack(self, pos: int, minimum: int) -> int:
  131. """Skip spaces from given position in reverse."""
  132. if pos <= minimum:
  133. return pos
  134. while pos > minimum:
  135. pos -= 1
  136. if not isStrSpace(self.src[pos]):
  137. return pos + 1
  138. return pos
  139. def skipChars(self, pos: int, code: int) -> int:
  140. """Skip character code from given position."""
  141. while True:
  142. try:
  143. current = self.srcCharCode[pos]
  144. except IndexError:
  145. break
  146. if current != code:
  147. break
  148. pos += 1
  149. return pos
  150. def skipCharsStr(self, pos: int, ch: str) -> int:
  151. """Skip character string from given position."""
  152. while True:
  153. try:
  154. current = self.src[pos]
  155. except IndexError:
  156. break
  157. if current != ch:
  158. break
  159. pos += 1
  160. return pos
  161. def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
  162. """Skip character code reverse from given position - 1."""
  163. if pos <= minimum:
  164. return pos
  165. while pos > minimum:
  166. pos -= 1
  167. if code != self.srcCharCode[pos]:
  168. return pos + 1
  169. return pos
  170. def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int:
  171. """Skip character string reverse from given position - 1."""
  172. if pos <= minimum:
  173. return pos
  174. while pos > minimum:
  175. pos -= 1
  176. if ch != self.src[pos]:
  177. return pos + 1
  178. return pos
  179. def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
  180. """Cut lines range from source."""
  181. line = begin
  182. if begin >= end:
  183. return ""
  184. queue = [""] * (end - begin)
  185. i = 1
  186. while line < end:
  187. lineIndent = 0
  188. lineStart = first = self.bMarks[line]
  189. last = (
  190. self.eMarks[line] + 1
  191. if line + 1 < end or keepLastLF
  192. else self.eMarks[line]
  193. )
  194. while (first < last) and (lineIndent < indent):
  195. ch = self.src[first]
  196. if isStrSpace(ch):
  197. if ch == "\t":
  198. lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
  199. else:
  200. lineIndent += 1
  201. elif first - lineStart < self.tShift[line]:
  202. lineIndent += 1
  203. else:
  204. break
  205. first += 1
  206. if lineIndent > indent:
  207. # partially expanding tabs in code blocks, e.g '\t\tfoobar'
  208. # with indent=2 becomes ' \tfoobar'
  209. queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
  210. else:
  211. queue[i - 1] = self.src[first:last]
  212. line += 1
  213. i += 1
  214. return "".join(queue)
  215. def is_code_block(self, line: int) -> bool:
  216. """Check if line is a code block,
  217. i.e. the code block rule is enabled and text is indented by more than 3 spaces.
  218. """
  219. return self._code_enabled and (self.sCount[line] - self.blkIndent) >= 4