| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- """Balance paired characters (*, _, etc) in inline tokens."""
- from __future__ import annotations
- from .state_inline import Delimiter, StateInline
- def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
- """For each opening emphasis-like marker find a matching closing one."""
- if not delimiters:
- return
- openersBottom = {}
- maximum = len(delimiters)
- # headerIdx is the first delimiter of the current (where closer is) delimiter run
- headerIdx = 0
- lastTokenIdx = -2 # needs any value lower than -1
- jumps: list[int] = []
- closerIdx = 0
- while closerIdx < maximum:
- closer = delimiters[closerIdx]
- jumps.append(0)
- # markers belong to same delimiter run if:
- # - they have adjacent tokens
- # - AND markers are the same
- #
- if (
- delimiters[headerIdx].marker != closer.marker
- or lastTokenIdx != closer.token - 1
- ):
- headerIdx = closerIdx
- lastTokenIdx = closer.token
- # Length is only used for emphasis-specific "rule of 3",
- # if it's not defined (in strikethrough or 3rd party plugins),
- # we can default it to 0 to disable those checks.
- #
- closer.length = closer.length or 0
- if not closer.close:
- closerIdx += 1
- continue
- # Previously calculated lower bounds (previous fails)
- # for each marker, each delimiter length modulo 3,
- # and for whether this closer can be an opener;
- # https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
- if closer.marker not in openersBottom:
- openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]
- minOpenerIdx = openersBottom[closer.marker][
- (3 if closer.open else 0) + (closer.length % 3)
- ]
- openerIdx = headerIdx - jumps[headerIdx] - 1
- newMinOpenerIdx = openerIdx
- while openerIdx > minOpenerIdx:
- opener = delimiters[openerIdx]
- if opener.marker != closer.marker:
- openerIdx -= jumps[openerIdx] + 1
- continue
- if opener.open and opener.end < 0:
- isOddMatch = False
- # from spec:
- #
- # If one of the delimiters can both open and close emphasis, then the
- # sum of the lengths of the delimiter runs containing the opening and
- # closing delimiters must not be a multiple of 3 unless both lengths
- # are multiples of 3.
- #
- if (
- (opener.close or closer.open)
- and ((opener.length + closer.length) % 3 == 0)
- and (opener.length % 3 != 0 or closer.length % 3 != 0)
- ):
- isOddMatch = True
- if not isOddMatch:
- # If previous delimiter cannot be an opener, we can safely skip
- # the entire sequence in future checks. This is required to make
- # sure algorithm has linear complexity (see *_*_*_*_*_... case).
- #
- if openerIdx > 0 and not delimiters[openerIdx - 1].open:
- lastJump = jumps[openerIdx - 1] + 1
- else:
- lastJump = 0
- jumps[closerIdx] = closerIdx - openerIdx + lastJump
- jumps[openerIdx] = lastJump
- closer.open = False
- opener.end = closerIdx
- opener.close = False
- newMinOpenerIdx = -1
- # treat next token as start of run,
- # it optimizes skips in **<...>**a**<...>** pathological case
- lastTokenIdx = -2
- break
- openerIdx -= jumps[openerIdx] + 1
- if newMinOpenerIdx != -1:
- # If match for this delimiter run failed, we want to set lower bound for
- # future lookups. This is required to make sure algorithm has linear
- # complexity.
- #
- # See details here:
- # https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
- #
- openersBottom[closer.marker][
- (3 if closer.open else 0) + ((closer.length or 0) % 3)
- ] = newMinOpenerIdx
- closerIdx += 1
- def link_pairs(state: StateInline) -> None:
- tokens_meta = state.tokens_meta
- maximum = len(state.tokens_meta)
- processDelimiters(state, state.delimiters)
- curr = 0
- while curr < maximum:
- curr_meta = tokens_meta[curr]
- if curr_meta and "delimiters" in curr_meta:
- processDelimiters(state, curr_meta["delimiters"])
- curr += 1
|