core.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. import re
  2. import sys
  3. from typing import (
  4. Any,
  5. Callable,
  6. ClassVar,
  7. Dict,
  8. Generic,
  9. Iterable,
  10. List,
  11. Match,
  12. MutableMapping,
  13. Optional,
  14. Pattern,
  15. Type,
  16. TypeVar,
  17. Union,
  18. cast,
  19. )
  20. if sys.version_info >= (3, 11):
  21. from typing import Self
  22. else:
  23. from typing_extensions import Self
  24. _LINE_END = re.compile(r"\n|$")
  25. class BlockState:
  26. """The state to save block parser's cursor and tokens."""
  27. src: str
  28. tokens: List[Dict[str, Any]]
  29. cursor: int
  30. cursor_max: int
  31. list_tight: bool
  32. parent: Any
  33. env: MutableMapping[str, Any]
  34. def __init__(self, parent: Optional[Any] = None) -> None:
  35. self.src = ""
  36. self.tokens = []
  37. # current cursor position
  38. self.cursor = 0
  39. self.cursor_max = 0
  40. # for list and block quote chain
  41. self.list_tight = True
  42. self.parent = parent
  43. # for saving def references
  44. if parent:
  45. self.env = parent.env
  46. else:
  47. self.env = {"ref_links": {}}
  48. def child_state(self, src: str) -> "BlockState":
  49. child = self.__class__(self)
  50. child.process(src)
  51. return child
  52. def process(self, src: str) -> None:
  53. self.src = src
  54. self.cursor_max = len(src)
  55. def find_line_end(self) -> int:
  56. m = _LINE_END.search(self.src, self.cursor)
  57. assert m is not None
  58. return m.end()
  59. def get_text(self, end_pos: int) -> str:
  60. return self.src[self.cursor : end_pos]
  61. def last_token(self) -> Any:
  62. if self.tokens:
  63. return self.tokens[-1]
  64. def prepend_token(self, token: Dict[str, Any]) -> None:
  65. """Insert token before the last token."""
  66. self.tokens.insert(len(self.tokens) - 1, token)
  67. def append_token(self, token: Dict[str, Any]) -> None:
  68. """Add token to the end of token list."""
  69. self.tokens.append(token)
  70. def add_paragraph(self, text: str) -> None:
  71. last_token = self.last_token()
  72. if last_token and last_token["type"] == "paragraph":
  73. last_token["text"] += text
  74. else:
  75. self.tokens.append({"type": "paragraph", "text": text})
  76. def append_paragraph(self) -> Optional[int]:
  77. last_token = self.last_token()
  78. if last_token and last_token["type"] == "paragraph":
  79. pos = self.find_line_end()
  80. last_token["text"] += self.get_text(pos)
  81. return pos
  82. return None
  83. def depth(self) -> int:
  84. d = 0
  85. parent = self.parent
  86. while parent:
  87. d += 1
  88. parent = parent.parent
  89. return d
  90. class InlineState:
  91. """The state to save inline parser's tokens."""
  92. def __init__(self, env: MutableMapping[str, Any]):
  93. self.env = env
  94. self.src = ""
  95. self.tokens: List[Dict[str, Any]] = []
  96. self.in_image = False
  97. self.in_link = False
  98. self.in_emphasis = False
  99. self.in_strong = False
  100. def prepend_token(self, token: Dict[str, Any]) -> None:
  101. """Insert token before the last token."""
  102. self.tokens.insert(len(self.tokens) - 1, token)
  103. def append_token(self, token: Dict[str, Any]) -> None:
  104. """Add token to the end of token list."""
  105. self.tokens.append(token)
  106. def copy(self) -> "InlineState":
  107. """Create a copy of current state."""
  108. state = self.__class__(self.env)
  109. state.in_image = self.in_image
  110. state.in_link = self.in_link
  111. state.in_emphasis = self.in_emphasis
  112. state.in_strong = self.in_strong
  113. return state
  114. ST = TypeVar("ST", InlineState, BlockState)
  115. class Parser(Generic[ST]):
  116. sc_flag: "re._FlagsType" = re.M
  117. state_cls: Type[ST]
  118. SPECIFICATION: ClassVar[Dict[str, str]] = {}
  119. DEFAULT_RULES: ClassVar[Iterable[str]] = []
  120. def __init__(self) -> None:
  121. self.specification = self.SPECIFICATION.copy()
  122. self.rules = list(self.DEFAULT_RULES)
  123. self._methods: Dict[
  124. str,
  125. Callable[[Match[str], ST], Optional[int]],
  126. ] = {}
  127. self.__sc: Dict[str, Pattern[str]] = {}
  128. def compile_sc(self, rules: Optional[List[str]] = None) -> Pattern[str]:
  129. if rules is None:
  130. key = "$"
  131. rules = self.rules
  132. else:
  133. key = "|".join(rules)
  134. sc = self.__sc.get(key)
  135. if sc:
  136. return sc
  137. regex = "|".join(r"(?P<%s>%s)" % (k, self.specification[k]) for k in rules)
  138. sc = re.compile(regex, self.sc_flag)
  139. self.__sc[key] = sc
  140. return sc
  141. def register(
  142. self,
  143. name: str,
  144. pattern: Union[str, None],
  145. func: Callable[[Self, Match[str], ST], Optional[int]],
  146. before: Optional[str] = None,
  147. ) -> None:
  148. """Register a new rule to parse the token. This method is usually used to
  149. create a new plugin.
  150. :param name: name of the new grammar
  151. :param pattern: regex pattern in string
  152. :param func: the parsing function
  153. :param before: insert this rule before a built-in rule
  154. """
  155. self._methods[name] = lambda m, state: func(self, m, state)
  156. if pattern:
  157. self.specification[name] = pattern
  158. if name not in self.rules:
  159. self.insert_rule(self.rules, name, before=before)
  160. def register_rule(self, name: str, pattern: str, func: Any) -> None:
  161. raise DeprecationWarning("This plugin is not compatible with mistune v3.")
  162. @staticmethod
  163. def insert_rule(rules: List[str], name: str, before: Optional[str] = None) -> None:
  164. if before:
  165. try:
  166. index = rules.index(before)
  167. rules.insert(index, name)
  168. except ValueError:
  169. rules.append(name)
  170. else:
  171. rules.append(name)
  172. def parse_method(self, m: Match[str], state: ST) -> Optional[int]:
  173. lastgroup = m.lastgroup
  174. assert lastgroup
  175. func = self._methods[lastgroup]
  176. return func(m, state)
  177. class BaseRenderer(object):
  178. NAME: ClassVar[str] = "base"
  179. def __init__(self) -> None:
  180. self.__methods: Dict[str, Callable[..., str]] = {}
  181. def register(self, name: str, method: Callable[..., str]) -> None:
  182. """Register a render method for the named token. For example::
  183. def render_wiki(renderer, key, title):
  184. return f'<a href="/wiki/{key}">{title}</a>'
  185. renderer.register('wiki', render_wiki)
  186. """
  187. # bind self into renderer method
  188. self.__methods[name] = lambda *arg, **kwargs: method(self, *arg, **kwargs)
  189. def _get_method(self, name: str) -> Callable[..., str]:
  190. try:
  191. return cast(Callable[..., str], object.__getattribute__(self, name))
  192. except AttributeError:
  193. method = self.__methods.get(name)
  194. if not method:
  195. raise AttributeError('No renderer "{!r}"'.format(name))
  196. return method
  197. def render_token(self, token: Dict[str, Any], state: BlockState) -> str:
  198. func = self._get_method(token["type"])
  199. return func(token, state)
  200. def iter_tokens(self, tokens: Iterable[Dict[str, Any]], state: BlockState) -> Iterable[str]:
  201. for tok in tokens:
  202. yield self.render_token(tok, state)
  203. def render_tokens(self, tokens: Iterable[Dict[str, Any]], state: BlockState) -> str:
  204. return "".join(self.iter_tokens(tokens, state))
  205. def __call__(self, tokens: Iterable[Dict[str, Any]], state: BlockState) -> str:
  206. return self.render_tokens(tokens, state)