balance_pairs.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. """Balance paired characters (*, _, etc) in inline tokens."""
  2. from __future__ import annotations
  3. from .state_inline import Delimiter, StateInline
  4. def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
  5. """For each opening emphasis-like marker find a matching closing one."""
  6. if not delimiters:
  7. return
  8. openersBottom = {}
  9. maximum = len(delimiters)
  10. # headerIdx is the first delimiter of the current (where closer is) delimiter run
  11. headerIdx = 0
  12. lastTokenIdx = -2 # needs any value lower than -1
  13. jumps: list[int] = []
  14. closerIdx = 0
  15. while closerIdx < maximum:
  16. closer = delimiters[closerIdx]
  17. jumps.append(0)
  18. # markers belong to same delimiter run if:
  19. # - they have adjacent tokens
  20. # - AND markers are the same
  21. #
  22. if (
  23. delimiters[headerIdx].marker != closer.marker
  24. or lastTokenIdx != closer.token - 1
  25. ):
  26. headerIdx = closerIdx
  27. lastTokenIdx = closer.token
  28. # Length is only used for emphasis-specific "rule of 3",
  29. # if it's not defined (in strikethrough or 3rd party plugins),
  30. # we can default it to 0 to disable those checks.
  31. #
  32. closer.length = closer.length or 0
  33. if not closer.close:
  34. closerIdx += 1
  35. continue
  36. # Previously calculated lower bounds (previous fails)
  37. # for each marker, each delimiter length modulo 3,
  38. # and for whether this closer can be an opener;
  39. # https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
  40. if closer.marker not in openersBottom:
  41. openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]
  42. minOpenerIdx = openersBottom[closer.marker][
  43. (3 if closer.open else 0) + (closer.length % 3)
  44. ]
  45. openerIdx = headerIdx - jumps[headerIdx] - 1
  46. newMinOpenerIdx = openerIdx
  47. while openerIdx > minOpenerIdx:
  48. opener = delimiters[openerIdx]
  49. if opener.marker != closer.marker:
  50. openerIdx -= jumps[openerIdx] + 1
  51. continue
  52. if opener.open and opener.end < 0:
  53. isOddMatch = False
  54. # from spec:
  55. #
  56. # If one of the delimiters can both open and close emphasis, then the
  57. # sum of the lengths of the delimiter runs containing the opening and
  58. # closing delimiters must not be a multiple of 3 unless both lengths
  59. # are multiples of 3.
  60. #
  61. if (
  62. (opener.close or closer.open)
  63. and ((opener.length + closer.length) % 3 == 0)
  64. and (opener.length % 3 != 0 or closer.length % 3 != 0)
  65. ):
  66. isOddMatch = True
  67. if not isOddMatch:
  68. # If previous delimiter cannot be an opener, we can safely skip
  69. # the entire sequence in future checks. This is required to make
  70. # sure algorithm has linear complexity (see *_*_*_*_*_... case).
  71. #
  72. if openerIdx > 0 and not delimiters[openerIdx - 1].open:
  73. lastJump = jumps[openerIdx - 1] + 1
  74. else:
  75. lastJump = 0
  76. jumps[closerIdx] = closerIdx - openerIdx + lastJump
  77. jumps[openerIdx] = lastJump
  78. closer.open = False
  79. opener.end = closerIdx
  80. opener.close = False
  81. newMinOpenerIdx = -1
  82. # treat next token as start of run,
  83. # it optimizes skips in **<...>**a**<...>** pathological case
  84. lastTokenIdx = -2
  85. break
  86. openerIdx -= jumps[openerIdx] + 1
  87. if newMinOpenerIdx != -1:
  88. # If match for this delimiter run failed, we want to set lower bound for
  89. # future lookups. This is required to make sure algorithm has linear
  90. # complexity.
  91. #
  92. # See details here:
  93. # https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
  94. #
  95. openersBottom[closer.marker][
  96. (3 if closer.open else 0) + ((closer.length or 0) % 3)
  97. ] = newMinOpenerIdx
  98. closerIdx += 1
  99. def link_pairs(state: StateInline) -> None:
  100. tokens_meta = state.tokens_meta
  101. maximum = len(state.tokens_meta)
  102. processDelimiters(state, state.delimiters)
  103. curr = 0
  104. while curr < maximum:
  105. curr_meta = tokens_meta[curr]
  106. if curr_meta and "delimiters" in curr_meta:
  107. processDelimiters(state, curr_meta["delimiters"])
  108. curr += 1