| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499 |
- #
- # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- # Use of this file is governed by the BSD 3-clause license that
- # can be found in the LICENSE.txt file in the project root.
- #
- #
- # This enumeration defines the prediction modes available in ANTLR 4 along with
- # utility methods for analyzing configuration sets for conflicts and/or
- # ambiguities.
- from enum import Enum
- from antlr4.atn.ATN import ATN
- from antlr4.atn.ATNConfig import ATNConfig
- from antlr4.atn.ATNConfigSet import ATNConfigSet
- from antlr4.atn.ATNState import RuleStopState
- from antlr4.atn.SemanticContext import SemanticContext
- PredictionMode = None
- class PredictionMode(Enum):
- #
- # The SLL(*) prediction mode. This prediction mode ignores the current
- # parser context when making predictions. This is the fastest prediction
- # mode, and provides correct results for many grammars. This prediction
- # mode is more powerful than the prediction mode provided by ANTLR 3, but
- # may result in syntax errors for grammar and input combinations which are
- # not SLL.
- #
- # <p>
- # When using this prediction mode, the parser will either return a correct
- # parse tree (i.e. the same parse tree that would be returned with the
- # {@link #LL} prediction mode), or it will report a syntax error. If a
- # syntax error is encountered when using the {@link #SLL} prediction mode,
- # it may be due to either an actual syntax error in the input or indicate
- # that the particular combination of grammar and input requires the more
- # powerful {@link #LL} prediction abilities to complete successfully.</p>
- #
- # <p>
- # This prediction mode does not provide any guarantees for prediction
- # behavior for syntactically-incorrect inputs.</p>
- #
- SLL = 0
- #
- # The LL(*) prediction mode. This prediction mode allows the current parser
- # context to be used for resolving SLL conflicts that occur during
- # prediction. This is the fastest prediction mode that guarantees correct
- # parse results for all combinations of grammars with syntactically correct
- # inputs.
- #
- # <p>
- # When using this prediction mode, the parser will make correct decisions
- # for all syntactically-correct grammar and input combinations. However, in
- # cases where the grammar is truly ambiguous this prediction mode might not
- # report a precise answer for <em>exactly which</em> alternatives are
- # ambiguous.</p>
- #
- # <p>
- # This prediction mode does not provide any guarantees for prediction
- # behavior for syntactically-incorrect inputs.</p>
- #
- LL = 1
- #
- # The LL(*) prediction mode with exact ambiguity detection. In addition to
- # the correctness guarantees provided by the {@link #LL} prediction mode,
- # this prediction mode instructs the prediction algorithm to determine the
- # complete and exact set of ambiguous alternatives for every ambiguous
- # decision encountered while parsing.
- #
- # <p>
- # This prediction mode may be used for diagnosing ambiguities during
- # grammar development. Due to the performance overhead of calculating sets
- # of ambiguous alternatives, this prediction mode should be avoided when
- # the exact results are not necessary.</p>
- #
- # <p>
- # This prediction mode does not provide any guarantees for prediction
- # behavior for syntactically-incorrect inputs.</p>
- #
- LL_EXACT_AMBIG_DETECTION = 2
- #
- # Computes the SLL prediction termination condition.
- #
- # <p>
- # This method computes the SLL prediction termination condition for both of
- # the following cases.</p>
- #
- # <ul>
- # <li>The usual SLL+LL fallback upon SLL conflict</li>
- # <li>Pure SLL without LL fallback</li>
- # </ul>
- #
- # <p><strong>COMBINED SLL+LL PARSING</strong></p>
- #
- # <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
- # ensured regardless of how the termination condition is computed by this
- # method. Due to the substantially higher cost of LL prediction, the
- # prediction should only fall back to LL when the additional lookahead
- # cannot lead to a unique SLL prediction.</p>
- #
- # <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
- # conflicting subsets should fall back to full LL, even if the
- # configuration sets don't resolve to the same alternative (e.g.
- # {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
- # configuration, SLL could continue with the hopes that more lookahead will
- # resolve via one of those non-conflicting configurations.</p>
- #
- # <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
- # stops when it sees only conflicting configuration subsets. In contrast,
- # full LL keeps going when there is uncertainty.</p>
- #
- # <p><strong>HEURISTIC</strong></p>
- #
- # <p>As a heuristic, we stop prediction when we see any conflicting subset
- # unless we see a state that only has one alternative associated with it.
- # The single-alt-state thing lets prediction continue upon rules like
- # (otherwise, it would admit defeat too soon):</p>
- #
- # <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
- #
- # <p>When the ATN simulation reaches the state before {@code ';'}, it has a
- # DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
- # {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
- # processing this node because alternative to has another way to continue,
- # via {@code [6|2|[]]}.</p>
- #
- # <p>It also let's us continue for this rule:</p>
- #
- # <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
- #
- # <p>After matching input A, we reach the stop state for rule A, state 1.
- # State 8 is the state right before B. Clearly alternatives 1 and 2
- # conflict and no amount of further lookahead will separate the two.
- # However, alternative 3 will be able to continue and so we do not stop
- # working on this state. In the previous example, we're concerned with
- # states associated with the conflicting alternatives. Here alt 3 is not
- # associated with the conflicting configs, but since we can continue
- # looking for input reasonably, don't declare the state done.</p>
- #
- # <p><strong>PURE SLL PARSING</strong></p>
- #
- # <p>To handle pure SLL parsing, all we have to do is make sure that we
- # combine stack contexts for configurations that differ only by semantic
- # predicate. From there, we can do the usual SLL termination heuristic.</p>
- #
- # <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
- #
- # <p>SLL decisions don't evaluate predicates until after they reach DFA stop
- # states because they need to create the DFA cache that works in all
- # semantic situations. In contrast, full LL evaluates predicates collected
- # during start state computation so it can ignore predicates thereafter.
- # This means that SLL termination detection can totally ignore semantic
- # predicates.</p>
- #
- # <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
- # semantic predicate contexts so we might see two configurations like the
- # following.</p>
- #
- # <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
- #
- # <p>Before testing these configurations against others, we have to merge
- # {@code x} and {@code x'} (without modifying the existing configurations).
- # For example, we test {@code (x+x')==x''} when looking for conflicts in
- # the following configurations.</p>
- #
- # <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
- #
- # <p>If the configuration set has predicates (as indicated by
- # {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
- # the configurations to strip out all of the predicates so that a standard
- # {@link ATNConfigSet} will merge everything ignoring predicates.</p>
- #
- @classmethod
- def hasSLLConflictTerminatingPrediction(cls, mode:PredictionMode, configs:ATNConfigSet):
- # Configs in rule stop states indicate reaching the end of the decision
- # rule (local context) or end of start rule (full context). If all
- # configs meet this condition, then none of the configurations is able
- # to match additional input so we terminate prediction.
- #
- if cls.allConfigsInRuleStopStates(configs):
- return True
- # pure SLL mode parsing
- if mode == PredictionMode.SLL:
- # Don't bother with combining configs from different semantic
- # contexts if we can fail over to full LL; costs more time
- # since we'll often fail over anyway.
- if configs.hasSemanticContext:
- # dup configs, tossing out semantic predicates
- dup = ATNConfigSet()
- for c in configs:
- c = ATNConfig(config=c, semantic=SemanticContext.NONE)
- dup.add(c)
- configs = dup
- # now we have combined contexts for configs with dissimilar preds
- # pure SLL or combined SLL+LL mode parsing
- altsets = cls.getConflictingAltSubsets(configs)
- return cls.hasConflictingAltSet(altsets) and not cls.hasStateAssociatedWithOneAlt(configs)
- # Checks if any configuration in {@code configs} is in a
- # {@link RuleStopState}. Configurations meeting this condition have reached
- # the end of the decision rule (local context) or end of start rule (full
- # context).
- #
- # @param configs the configuration set to test
- # @return {@code true} if any configuration in {@code configs} is in a
- # {@link RuleStopState}, otherwise {@code false}
- @classmethod
- def hasConfigInRuleStopState(cls, configs:ATNConfigSet):
- return any(isinstance(cfg.state, RuleStopState) for cfg in configs)
- # Checks if all configurations in {@code configs} are in a
- # {@link RuleStopState}. Configurations meeting this condition have reached
- # the end of the decision rule (local context) or end of start rule (full
- # context).
- #
- # @param configs the configuration set to test
- # @return {@code true} if all configurations in {@code configs} are in a
- # {@link RuleStopState}, otherwise {@code false}
- @classmethod
- def allConfigsInRuleStopStates(cls, configs:ATNConfigSet):
- return all(isinstance(cfg.state, RuleStopState) for cfg in configs)
- #
- # Full LL prediction termination.
- #
- # <p>Can we stop looking ahead during ATN simulation or is there some
- # uncertainty as to which alternative we will ultimately pick, after
- # consuming more input? Even if there are partial conflicts, we might know
- # that everything is going to resolve to the same minimum alternative. That
- # means we can stop since no more lookahead will change that fact. On the
- # other hand, there might be multiple conflicts that resolve to different
- # minimums. That means we need more look ahead to decide which of those
- # alternatives we should predict.</p>
- #
- # <p>The basic idea is to split the set of configurations {@code C}, into
- # conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
- # non-conflicting configurations. Two configurations conflict if they have
- # identical {@link ATNConfig#state} and {@link ATNConfig#context} values
- # but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
- # and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
- #
- # <p>Reduce these configuration subsets to the set of possible alternatives.
- # You can compute the alternative subsets in one pass as follows:</p>
- #
- # <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
- # {@code C} holding {@code s} and {@code ctx} fixed.</p>
- #
- # <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
- #
- # <pre>
- # map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
- # alt and not pred
- # </pre>
- #
- # <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
- #
- # <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
- # {@code s} and {@code ctx}.</p>
- #
- # <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
- # the union of these alternative subsets is a singleton, then no amount of
- # more lookahead will help us. We will always pick that alternative. If,
- # however, there is more than one alternative, then we are uncertain which
- # alternative to predict and must continue looking for resolution. We may
- # or may not discover an ambiguity in the future, even if there are no
- # conflicting subsets this round.</p>
- #
- # <p>The biggest sin is to terminate early because it means we've made a
- # decision but were uncertain as to the eventual outcome. We haven't used
- # enough lookahead. On the other hand, announcing a conflict too late is no
- # big deal; you will still have the conflict. It's just inefficient. It
- # might even look until the end of file.</p>
- #
- # <p>No special consideration for semantic predicates is required because
- # predicates are evaluated on-the-fly for full LL prediction, ensuring that
- # no configuration contains a semantic context during the termination
- # check.</p>
- #
- # <p><strong>CONFLICTING CONFIGS</strong></p>
- #
- # <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
- # when {@code i!=j} but {@code x=x'}. Because we merge all
- # {@code (s, i, _)} configurations together, that means that there are at
- # most {@code n} configurations associated with state {@code s} for
- # {@code n} possible alternatives in the decision. The merged stacks
- # complicate the comparison of configuration contexts {@code x} and
- # {@code x'}. Sam checks to see if one is a subset of the other by calling
- # merge and checking to see if the merged result is either {@code x} or
- # {@code x'}. If the {@code x} associated with lowest alternative {@code i}
- # is the superset, then {@code i} is the only possible prediction since the
- # others resolve to {@code min(i)} as well. However, if {@code x} is
- # associated with {@code j>i} then at least one stack configuration for
- # {@code j} is not in conflict with alternative {@code i}. The algorithm
- # should keep going, looking for more lookahead due to the uncertainty.</p>
- #
- # <p>For simplicity, I'm doing a equality check between {@code x} and
- # {@code x'} that lets the algorithm continue to consume lookahead longer
- # than necessary. The reason I like the equality is of course the
- # simplicity but also because that is the test you need to detect the
- # alternatives that are actually in conflict.</p>
- #
- # <p><strong>CONTINUE/STOP RULE</strong></p>
- #
- # <p>Continue if union of resolved alternative sets from non-conflicting and
- # conflicting alternative subsets has more than one alternative. We are
- # uncertain about which alternative to predict.</p>
- #
- # <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
- # alternatives are still in the running for the amount of input we've
- # consumed at this point. The conflicting sets let us to strip away
- # configurations that won't lead to more states because we resolve
- # conflicts to the configuration with a minimum alternate for the
- # conflicting set.</p>
- #
- # <p><strong>CASES</strong></p>
- #
- # <ul>
- #
- # <li>no conflicts and more than 1 alternative in set => continue</li>
- #
- # <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
- # {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
- # {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
- # {@code {1,3}} => continue
- # </li>
- #
- # <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
- # {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
- # {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
- # {@code {1}} => stop and predict 1</li>
- #
- # <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
- # {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
- # {@code {1}} = {@code {1}} => stop and predict 1, can announce
- # ambiguity {@code {1,2}}</li>
- #
- # <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
- # {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
- # {@code {2}} = {@code {1,2}} => continue</li>
- #
- # <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
- # {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
- # {@code {3}} = {@code {1,3}} => continue</li>
- #
- # </ul>
- #
- # <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
- #
- # <p>If all states report the same conflicting set of alternatives, then we
- # know we have the exact ambiguity set.</p>
- #
- # <p><code>|A_<em>i</em>|>1</code> and
- # <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
- #
- # <p>In other words, we continue examining lookahead until all {@code A_i}
- # have more than one alternative and all {@code A_i} are the same. If
- # {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
- # because the resolved set is {@code {1}}. To determine what the real
- # ambiguity is, we have to know whether the ambiguity is between one and
- # two or one and three so we keep going. We can only stop prediction when
- # we need exact ambiguity detection when the sets look like
- # {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
- #
- @classmethod
- def resolvesToJustOneViableAlt(cls, altsets:list):
- return cls.getSingleViableAlt(altsets)
- #
- # Determines if every alternative subset in {@code altsets} contains more
- # than one alternative.
- #
- # @param altsets a collection of alternative subsets
- # @return {@code true} if every {@link BitSet} in {@code altsets} has
- # {@link BitSet#cardinality cardinality} > 1, otherwise {@code false}
- #
- @classmethod
- def allSubsetsConflict(cls, altsets:list):
- return not cls.hasNonConflictingAltSet(altsets)
- #
- # Determines if any single alternative subset in {@code altsets} contains
- # exactly one alternative.
- #
- # @param altsets a collection of alternative subsets
- # @return {@code true} if {@code altsets} contains a {@link BitSet} with
- # {@link BitSet#cardinality cardinality} 1, otherwise {@code false}
- #
- @classmethod
- def hasNonConflictingAltSet(cls, altsets:list):
- return any(len(alts) == 1 for alts in altsets)
- #
- # Determines if any single alternative subset in {@code altsets} contains
- # more than one alternative.
- #
- # @param altsets a collection of alternative subsets
- # @return {@code true} if {@code altsets} contains a {@link BitSet} with
- # {@link BitSet#cardinality cardinality} > 1, otherwise {@code false}
- #
- @classmethod
- def hasConflictingAltSet(cls, altsets:list):
- return any(len(alts) > 1 for alts in altsets)
- #
- # Determines if every alternative subset in {@code altsets} is equivalent.
- #
- # @param altsets a collection of alternative subsets
- # @return {@code true} if every member of {@code altsets} is equal to the
- # others, otherwise {@code false}
- #
- @classmethod
- def allSubsetsEqual(cls, altsets:list):
- if not altsets:
- return True
- first = next(iter(altsets))
- return all(alts == first for alts in iter(altsets))
- #
- # Returns the unique alternative predicted by all alternative subsets in
- # {@code altsets}. If no such alternative exists, this method returns
- # {@link ATN#INVALID_ALT_NUMBER}.
- #
- # @param altsets a collection of alternative subsets
- #
- @classmethod
- def getUniqueAlt(cls, altsets:list):
- all = cls.getAlts(altsets)
- if len(all)==1:
- return next(iter(all))
- return ATN.INVALID_ALT_NUMBER
- # Gets the complete set of represented alternatives for a collection of
- # alternative subsets. This method returns the union of each {@link BitSet}
- # in {@code altsets}.
- #
- # @param altsets a collection of alternative subsets
- # @return the set of represented alternatives in {@code altsets}
- #
- @classmethod
- def getAlts(cls, altsets:list):
- return set.union(*altsets)
- #
- # This function gets the conflicting alt subsets from a configuration set.
- # For each configuration {@code c} in {@code configs}:
- #
- # <pre>
- # map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
- # alt and not pred
- # </pre>
- #
- @classmethod
- def getConflictingAltSubsets(cls, configs:ATNConfigSet):
- configToAlts = dict()
- for c in configs:
- h = hash((c.state.stateNumber, c.context))
- alts = configToAlts.get(h, None)
- if alts is None:
- alts = set()
- configToAlts[h] = alts
- alts.add(c.alt)
- return configToAlts.values()
- #
- # Get a map from state to alt subset from a configuration set. For each
- # configuration {@code c} in {@code configs}:
- #
- # <pre>
- # map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
- # </pre>
- #
- @classmethod
- def getStateToAltMap(cls, configs:ATNConfigSet):
- m = dict()
- for c in configs:
- alts = m.get(c.state, None)
- if alts is None:
- alts = set()
- m[c.state] = alts
- alts.add(c.alt)
- return m
- @classmethod
- def hasStateAssociatedWithOneAlt(cls, configs:ATNConfigSet):
- return any(len(alts) == 1 for alts in cls.getStateToAltMap(configs).values())
- @classmethod
- def getSingleViableAlt(cls, altsets:list):
- viableAlts = set()
- for alts in altsets:
- minAlt = min(alts)
- viableAlts.add(minAlt)
- if len(viableAlts)>1 : # more than 1 viable alt
- return ATN.INVALID_ALT_NUMBER
- return min(viableAlts)
|