PredictionMode.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. #
  2. # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  3. # Use of this file is governed by the BSD 3-clause license that
  4. # can be found in the LICENSE.txt file in the project root.
  5. #
  6. #
  7. # This enumeration defines the prediction modes available in ANTLR 4 along with
  8. # utility methods for analyzing configuration sets for conflicts and/or
  9. # ambiguities.
  10. from enum import Enum
  11. from antlr4.atn.ATN import ATN
  12. from antlr4.atn.ATNConfig import ATNConfig
  13. from antlr4.atn.ATNConfigSet import ATNConfigSet
  14. from antlr4.atn.ATNState import RuleStopState
  15. from antlr4.atn.SemanticContext import SemanticContext
  16. PredictionMode = None
  17. class PredictionMode(Enum):
  18. #
  19. # The SLL(*) prediction mode. This prediction mode ignores the current
  20. # parser context when making predictions. This is the fastest prediction
  21. # mode, and provides correct results for many grammars. This prediction
  22. # mode is more powerful than the prediction mode provided by ANTLR 3, but
  23. # may result in syntax errors for grammar and input combinations which are
  24. # not SLL.
  25. #
  26. # <p>
  27. # When using this prediction mode, the parser will either return a correct
  28. # parse tree (i.e. the same parse tree that would be returned with the
  29. # {@link #LL} prediction mode), or it will report a syntax error. If a
  30. # syntax error is encountered when using the {@link #SLL} prediction mode,
  31. # it may be due to either an actual syntax error in the input or indicate
  32. # that the particular combination of grammar and input requires the more
  33. # powerful {@link #LL} prediction abilities to complete successfully.</p>
  34. #
  35. # <p>
  36. # This prediction mode does not provide any guarantees for prediction
  37. # behavior for syntactically-incorrect inputs.</p>
  38. #
  39. SLL = 0
  40. #
  41. # The LL(*) prediction mode. This prediction mode allows the current parser
  42. # context to be used for resolving SLL conflicts that occur during
  43. # prediction. This is the fastest prediction mode that guarantees correct
  44. # parse results for all combinations of grammars with syntactically correct
  45. # inputs.
  46. #
  47. # <p>
  48. # When using this prediction mode, the parser will make correct decisions
  49. # for all syntactically-correct grammar and input combinations. However, in
  50. # cases where the grammar is truly ambiguous this prediction mode might not
  51. # report a precise answer for <em>exactly which</em> alternatives are
  52. # ambiguous.</p>
  53. #
  54. # <p>
  55. # This prediction mode does not provide any guarantees for prediction
  56. # behavior for syntactically-incorrect inputs.</p>
  57. #
  58. LL = 1
  59. #
  60. # The LL(*) prediction mode with exact ambiguity detection. In addition to
  61. # the correctness guarantees provided by the {@link #LL} prediction mode,
  62. # this prediction mode instructs the prediction algorithm to determine the
  63. # complete and exact set of ambiguous alternatives for every ambiguous
  64. # decision encountered while parsing.
  65. #
  66. # <p>
  67. # This prediction mode may be used for diagnosing ambiguities during
  68. # grammar development. Due to the performance overhead of calculating sets
  69. # of ambiguous alternatives, this prediction mode should be avoided when
  70. # the exact results are not necessary.</p>
  71. #
  72. # <p>
  73. # This prediction mode does not provide any guarantees for prediction
  74. # behavior for syntactically-incorrect inputs.</p>
  75. #
  76. LL_EXACT_AMBIG_DETECTION = 2
  77. #
  78. # Computes the SLL prediction termination condition.
  79. #
  80. # <p>
  81. # This method computes the SLL prediction termination condition for both of
  82. # the following cases.</p>
  83. #
  84. # <ul>
  85. # <li>The usual SLL+LL fallback upon SLL conflict</li>
  86. # <li>Pure SLL without LL fallback</li>
  87. # </ul>
  88. #
  89. # <p><strong>COMBINED SLL+LL PARSING</strong></p>
  90. #
  91. # <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
  92. # ensured regardless of how the termination condition is computed by this
  93. # method. Due to the substantially higher cost of LL prediction, the
  94. # prediction should only fall back to LL when the additional lookahead
  95. # cannot lead to a unique SLL prediction.</p>
  96. #
  97. # <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
  98. # conflicting subsets should fall back to full LL, even if the
  99. # configuration sets don't resolve to the same alternative (e.g.
  100. # {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
  101. # configuration, SLL could continue with the hopes that more lookahead will
  102. # resolve via one of those non-conflicting configurations.</p>
  103. #
  104. # <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
  105. # stops when it sees only conflicting configuration subsets. In contrast,
  106. # full LL keeps going when there is uncertainty.</p>
  107. #
  108. # <p><strong>HEURISTIC</strong></p>
  109. #
  110. # <p>As a heuristic, we stop prediction when we see any conflicting subset
  111. # unless we see a state that only has one alternative associated with it.
  112. # The single-alt-state thing lets prediction continue upon rules like
  113. # (otherwise, it would admit defeat too soon):</p>
  114. #
  115. # <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
  116. #
  117. # <p>When the ATN simulation reaches the state before {@code ';'}, it has a
  118. # DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
  119. # {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
  120. # processing this node because alternative to has another way to continue,
  121. # via {@code [6|2|[]]}.</p>
  122. #
  123. # <p>It also let's us continue for this rule:</p>
  124. #
  125. # <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
  126. #
  127. # <p>After matching input A, we reach the stop state for rule A, state 1.
  128. # State 8 is the state right before B. Clearly alternatives 1 and 2
  129. # conflict and no amount of further lookahead will separate the two.
  130. # However, alternative 3 will be able to continue and so we do not stop
  131. # working on this state. In the previous example, we're concerned with
  132. # states associated with the conflicting alternatives. Here alt 3 is not
  133. # associated with the conflicting configs, but since we can continue
  134. # looking for input reasonably, don't declare the state done.</p>
  135. #
  136. # <p><strong>PURE SLL PARSING</strong></p>
  137. #
  138. # <p>To handle pure SLL parsing, all we have to do is make sure that we
  139. # combine stack contexts for configurations that differ only by semantic
  140. # predicate. From there, we can do the usual SLL termination heuristic.</p>
  141. #
  142. # <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
  143. #
  144. # <p>SLL decisions don't evaluate predicates until after they reach DFA stop
  145. # states because they need to create the DFA cache that works in all
  146. # semantic situations. In contrast, full LL evaluates predicates collected
  147. # during start state computation so it can ignore predicates thereafter.
  148. # This means that SLL termination detection can totally ignore semantic
  149. # predicates.</p>
  150. #
  151. # <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
  152. # semantic predicate contexts so we might see two configurations like the
  153. # following.</p>
  154. #
  155. # <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
  156. #
  157. # <p>Before testing these configurations against others, we have to merge
  158. # {@code x} and {@code x'} (without modifying the existing configurations).
  159. # For example, we test {@code (x+x')==x''} when looking for conflicts in
  160. # the following configurations.</p>
  161. #
  162. # <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
  163. #
  164. # <p>If the configuration set has predicates (as indicated by
  165. # {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
  166. # the configurations to strip out all of the predicates so that a standard
  167. # {@link ATNConfigSet} will merge everything ignoring predicates.</p>
  168. #
  169. @classmethod
  170. def hasSLLConflictTerminatingPrediction(cls, mode:PredictionMode, configs:ATNConfigSet):
  171. # Configs in rule stop states indicate reaching the end of the decision
  172. # rule (local context) or end of start rule (full context). If all
  173. # configs meet this condition, then none of the configurations is able
  174. # to match additional input so we terminate prediction.
  175. #
  176. if cls.allConfigsInRuleStopStates(configs):
  177. return True
  178. # pure SLL mode parsing
  179. if mode == PredictionMode.SLL:
  180. # Don't bother with combining configs from different semantic
  181. # contexts if we can fail over to full LL; costs more time
  182. # since we'll often fail over anyway.
  183. if configs.hasSemanticContext:
  184. # dup configs, tossing out semantic predicates
  185. dup = ATNConfigSet()
  186. for c in configs:
  187. c = ATNConfig(config=c, semantic=SemanticContext.NONE)
  188. dup.add(c)
  189. configs = dup
  190. # now we have combined contexts for configs with dissimilar preds
  191. # pure SLL or combined SLL+LL mode parsing
  192. altsets = cls.getConflictingAltSubsets(configs)
  193. return cls.hasConflictingAltSet(altsets) and not cls.hasStateAssociatedWithOneAlt(configs)
  194. # Checks if any configuration in {@code configs} is in a
  195. # {@link RuleStopState}. Configurations meeting this condition have reached
  196. # the end of the decision rule (local context) or end of start rule (full
  197. # context).
  198. #
  199. # @param configs the configuration set to test
  200. # @return {@code true} if any configuration in {@code configs} is in a
  201. # {@link RuleStopState}, otherwise {@code false}
  202. @classmethod
  203. def hasConfigInRuleStopState(cls, configs:ATNConfigSet):
  204. return any(isinstance(cfg.state, RuleStopState) for cfg in configs)
  205. # Checks if all configurations in {@code configs} are in a
  206. # {@link RuleStopState}. Configurations meeting this condition have reached
  207. # the end of the decision rule (local context) or end of start rule (full
  208. # context).
  209. #
  210. # @param configs the configuration set to test
  211. # @return {@code true} if all configurations in {@code configs} are in a
  212. # {@link RuleStopState}, otherwise {@code false}
  213. @classmethod
  214. def allConfigsInRuleStopStates(cls, configs:ATNConfigSet):
  215. return all(isinstance(cfg.state, RuleStopState) for cfg in configs)
  216. #
  217. # Full LL prediction termination.
  218. #
  219. # <p>Can we stop looking ahead during ATN simulation or is there some
  220. # uncertainty as to which alternative we will ultimately pick, after
  221. # consuming more input? Even if there are partial conflicts, we might know
  222. # that everything is going to resolve to the same minimum alternative. That
  223. # means we can stop since no more lookahead will change that fact. On the
  224. # other hand, there might be multiple conflicts that resolve to different
  225. # minimums. That means we need more look ahead to decide which of those
  226. # alternatives we should predict.</p>
  227. #
  228. # <p>The basic idea is to split the set of configurations {@code C}, into
  229. # conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
  230. # non-conflicting configurations. Two configurations conflict if they have
  231. # identical {@link ATNConfig#state} and {@link ATNConfig#context} values
  232. # but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
  233. # and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
  234. #
  235. # <p>Reduce these configuration subsets to the set of possible alternatives.
  236. # You can compute the alternative subsets in one pass as follows:</p>
  237. #
  238. # <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
  239. # {@code C} holding {@code s} and {@code ctx} fixed.</p>
  240. #
  241. # <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
  242. #
  243. # <pre>
  244. # map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
  245. # alt and not pred
  246. # </pre>
  247. #
  248. # <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
  249. #
  250. # <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
  251. # {@code s} and {@code ctx}.</p>
  252. #
  253. # <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
  254. # the union of these alternative subsets is a singleton, then no amount of
  255. # more lookahead will help us. We will always pick that alternative. If,
  256. # however, there is more than one alternative, then we are uncertain which
  257. # alternative to predict and must continue looking for resolution. We may
  258. # or may not discover an ambiguity in the future, even if there are no
  259. # conflicting subsets this round.</p>
  260. #
  261. # <p>The biggest sin is to terminate early because it means we've made a
  262. # decision but were uncertain as to the eventual outcome. We haven't used
  263. # enough lookahead. On the other hand, announcing a conflict too late is no
  264. # big deal; you will still have the conflict. It's just inefficient. It
  265. # might even look until the end of file.</p>
  266. #
  267. # <p>No special consideration for semantic predicates is required because
  268. # predicates are evaluated on-the-fly for full LL prediction, ensuring that
  269. # no configuration contains a semantic context during the termination
  270. # check.</p>
  271. #
  272. # <p><strong>CONFLICTING CONFIGS</strong></p>
  273. #
  274. # <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
  275. # when {@code i!=j} but {@code x=x'}. Because we merge all
  276. # {@code (s, i, _)} configurations together, that means that there are at
  277. # most {@code n} configurations associated with state {@code s} for
  278. # {@code n} possible alternatives in the decision. The merged stacks
  279. # complicate the comparison of configuration contexts {@code x} and
  280. # {@code x'}. Sam checks to see if one is a subset of the other by calling
  281. # merge and checking to see if the merged result is either {@code x} or
  282. # {@code x'}. If the {@code x} associated with lowest alternative {@code i}
  283. # is the superset, then {@code i} is the only possible prediction since the
  284. # others resolve to {@code min(i)} as well. However, if {@code x} is
  285. # associated with {@code j>i} then at least one stack configuration for
  286. # {@code j} is not in conflict with alternative {@code i}. The algorithm
  287. # should keep going, looking for more lookahead due to the uncertainty.</p>
  288. #
  289. # <p>For simplicity, I'm doing a equality check between {@code x} and
  290. # {@code x'} that lets the algorithm continue to consume lookahead longer
  291. # than necessary. The reason I like the equality is of course the
  292. # simplicity but also because that is the test you need to detect the
  293. # alternatives that are actually in conflict.</p>
  294. #
  295. # <p><strong>CONTINUE/STOP RULE</strong></p>
  296. #
  297. # <p>Continue if union of resolved alternative sets from non-conflicting and
  298. # conflicting alternative subsets has more than one alternative. We are
  299. # uncertain about which alternative to predict.</p>
  300. #
  301. # <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
  302. # alternatives are still in the running for the amount of input we've
  303. # consumed at this point. The conflicting sets let us to strip away
  304. # configurations that won't lead to more states because we resolve
  305. # conflicts to the configuration with a minimum alternate for the
  306. # conflicting set.</p>
  307. #
  308. # <p><strong>CASES</strong></p>
  309. #
  310. # <ul>
  311. #
  312. # <li>no conflicts and more than 1 alternative in set =&gt; continue</li>
  313. #
  314. # <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
  315. # {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
  316. # {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
  317. # {@code {1,3}} =&gt; continue
  318. # </li>
  319. #
  320. # <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
  321. # {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
  322. # {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
  323. # {@code {1}} =&gt; stop and predict 1</li>
  324. #
  325. # <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
  326. # {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
  327. # {@code {1}} = {@code {1}} =&gt; stop and predict 1, can announce
  328. # ambiguity {@code {1,2}}</li>
  329. #
  330. # <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
  331. # {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
  332. # {@code {2}} = {@code {1,2}} =&gt; continue</li>
  333. #
  334. # <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
  335. # {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
  336. # {@code {3}} = {@code {1,3}} =&gt; continue</li>
  337. #
  338. # </ul>
  339. #
  340. # <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
  341. #
  342. # <p>If all states report the same conflicting set of alternatives, then we
  343. # know we have the exact ambiguity set.</p>
  344. #
  345. # <p><code>|A_<em>i</em>|&gt;1</code> and
  346. # <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
  347. #
  348. # <p>In other words, we continue examining lookahead until all {@code A_i}
  349. # have more than one alternative and all {@code A_i} are the same. If
  350. # {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
  351. # because the resolved set is {@code {1}}. To determine what the real
  352. # ambiguity is, we have to know whether the ambiguity is between one and
  353. # two or one and three so we keep going. We can only stop prediction when
  354. # we need exact ambiguity detection when the sets look like
  355. # {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
  356. #
  357. @classmethod
  358. def resolvesToJustOneViableAlt(cls, altsets:list):
  359. return cls.getSingleViableAlt(altsets)
  360. #
  361. # Determines if every alternative subset in {@code altsets} contains more
  362. # than one alternative.
  363. #
  364. # @param altsets a collection of alternative subsets
  365. # @return {@code true} if every {@link BitSet} in {@code altsets} has
  366. # {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
  367. #
  368. @classmethod
  369. def allSubsetsConflict(cls, altsets:list):
  370. return not cls.hasNonConflictingAltSet(altsets)
  371. #
  372. # Determines if any single alternative subset in {@code altsets} contains
  373. # exactly one alternative.
  374. #
  375. # @param altsets a collection of alternative subsets
  376. # @return {@code true} if {@code altsets} contains a {@link BitSet} with
  377. # {@link BitSet#cardinality cardinality} 1, otherwise {@code false}
  378. #
  379. @classmethod
  380. def hasNonConflictingAltSet(cls, altsets:list):
  381. return any(len(alts) == 1 for alts in altsets)
  382. #
  383. # Determines if any single alternative subset in {@code altsets} contains
  384. # more than one alternative.
  385. #
  386. # @param altsets a collection of alternative subsets
  387. # @return {@code true} if {@code altsets} contains a {@link BitSet} with
  388. # {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
  389. #
  390. @classmethod
  391. def hasConflictingAltSet(cls, altsets:list):
  392. return any(len(alts) > 1 for alts in altsets)
  393. #
  394. # Determines if every alternative subset in {@code altsets} is equivalent.
  395. #
  396. # @param altsets a collection of alternative subsets
  397. # @return {@code true} if every member of {@code altsets} is equal to the
  398. # others, otherwise {@code false}
  399. #
  400. @classmethod
  401. def allSubsetsEqual(cls, altsets:list):
  402. if not altsets:
  403. return True
  404. first = next(iter(altsets))
  405. return all(alts == first for alts in iter(altsets))
  406. #
  407. # Returns the unique alternative predicted by all alternative subsets in
  408. # {@code altsets}. If no such alternative exists, this method returns
  409. # {@link ATN#INVALID_ALT_NUMBER}.
  410. #
  411. # @param altsets a collection of alternative subsets
  412. #
  413. @classmethod
  414. def getUniqueAlt(cls, altsets:list):
  415. all = cls.getAlts(altsets)
  416. if len(all)==1:
  417. return next(iter(all))
  418. return ATN.INVALID_ALT_NUMBER
  419. # Gets the complete set of represented alternatives for a collection of
  420. # alternative subsets. This method returns the union of each {@link BitSet}
  421. # in {@code altsets}.
  422. #
  423. # @param altsets a collection of alternative subsets
  424. # @return the set of represented alternatives in {@code altsets}
  425. #
  426. @classmethod
  427. def getAlts(cls, altsets:list):
  428. return set.union(*altsets)
  429. #
  430. # This function gets the conflicting alt subsets from a configuration set.
  431. # For each configuration {@code c} in {@code configs}:
  432. #
  433. # <pre>
  434. # map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
  435. # alt and not pred
  436. # </pre>
  437. #
  438. @classmethod
  439. def getConflictingAltSubsets(cls, configs:ATNConfigSet):
  440. configToAlts = dict()
  441. for c in configs:
  442. h = hash((c.state.stateNumber, c.context))
  443. alts = configToAlts.get(h, None)
  444. if alts is None:
  445. alts = set()
  446. configToAlts[h] = alts
  447. alts.add(c.alt)
  448. return configToAlts.values()
  449. #
  450. # Get a map from state to alt subset from a configuration set. For each
  451. # configuration {@code c} in {@code configs}:
  452. #
  453. # <pre>
  454. # map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
  455. # </pre>
  456. #
  457. @classmethod
  458. def getStateToAltMap(cls, configs:ATNConfigSet):
  459. m = dict()
  460. for c in configs:
  461. alts = m.get(c.state, None)
  462. if alts is None:
  463. alts = set()
  464. m[c.state] = alts
  465. alts.add(c.alt)
  466. return m
  467. @classmethod
  468. def hasStateAssociatedWithOneAlt(cls, configs:ATNConfigSet):
  469. return any(len(alts) == 1 for alts in cls.getStateToAltMap(configs).values())
  470. @classmethod
  471. def getSingleViableAlt(cls, altsets:list):
  472. viableAlts = set()
  473. for alts in altsets:
  474. minAlt = min(alts)
  475. viableAlts.add(minAlt)
  476. if len(viableAlts)>1 : # more than 1 viable alt
  477. return ATN.INVALID_ALT_NUMBER
  478. return min(viableAlts)