| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709 |
- #
- # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- # Use of this file is governed by the BSD 3-clause license that
- # can be found in the LICENSE.txt file in the project root.
- #
- import sys
- from antlr4.IntervalSet import IntervalSet
- from antlr4.Token import Token
- from antlr4.atn.ATNState import ATNState
- from antlr4.error.Errors import RecognitionException, NoViableAltException, InputMismatchException, \
- FailedPredicateException, ParseCancellationException
- # need forward declaration
- Parser = None
- class ErrorStrategy(object):
- def reset(self, recognizer:Parser):
- pass
- def recoverInline(self, recognizer:Parser):
- pass
- def recover(self, recognizer:Parser, e:RecognitionException):
- pass
- def sync(self, recognizer:Parser):
- pass
- def inErrorRecoveryMode(self, recognizer:Parser):
- pass
- def reportError(self, recognizer:Parser, e:RecognitionException):
- pass
- # This is the default implementation of {@link ANTLRErrorStrategy} used for
- # error reporting and recovery in ANTLR parsers.
- #
- class DefaultErrorStrategy(ErrorStrategy):
- def __init__(self):
- super().__init__()
- # Indicates whether the error strategy is currently "recovering from an
- # error". This is used to suppress reporting multiple error messages while
- # attempting to recover from a detected syntax error.
- #
- # @see #inErrorRecoveryMode
- #
- self.errorRecoveryMode = False
- # The index into the input stream where the last error occurred.
- # This is used to prevent infinite loops where an error is found
- # but no token is consumed during recovery...another error is found,
- # ad nauseum. This is a failsafe mechanism to guarantee that at least
- # one token/tree node is consumed for two errors.
- #
- self.lastErrorIndex = -1
- self.lastErrorStates = None
- self.nextTokensContext = None
- self.nextTokenState = 0
- # <p>The default implementation simply calls {@link #endErrorCondition} to
- # ensure that the handler is not in error recovery mode.</p>
- def reset(self, recognizer:Parser):
- self.endErrorCondition(recognizer)
- #
- # This method is called to enter error recovery mode when a recognition
- # exception is reported.
- #
- # @param recognizer the parser instance
- #
- def beginErrorCondition(self, recognizer:Parser):
- self.errorRecoveryMode = True
- def inErrorRecoveryMode(self, recognizer:Parser):
- return self.errorRecoveryMode
- #
- # This method is called to leave error recovery mode after recovering from
- # a recognition exception.
- #
- # @param recognizer
- #
- def endErrorCondition(self, recognizer:Parser):
- self.errorRecoveryMode = False
- self.lastErrorStates = None
- self.lastErrorIndex = -1
- #
- # {@inheritDoc}
- #
- # <p>The default implementation simply calls {@link #endErrorCondition}.</p>
- #
- def reportMatch(self, recognizer:Parser):
- self.endErrorCondition(recognizer)
- #
- # {@inheritDoc}
- #
- # <p>The default implementation returns immediately if the handler is already
- # in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
- # and dispatches the reporting task based on the runtime type of {@code e}
- # according to the following table.</p>
- #
- # <ul>
- # <li>{@link NoViableAltException}: Dispatches the call to
- # {@link #reportNoViableAlternative}</li>
- # <li>{@link InputMismatchException}: Dispatches the call to
- # {@link #reportInputMismatch}</li>
- # <li>{@link FailedPredicateException}: Dispatches the call to
- # {@link #reportFailedPredicate}</li>
- # <li>All other types: calls {@link Parser#notifyErrorListeners} to report
- # the exception</li>
- # </ul>
- #
- def reportError(self, recognizer:Parser, e:RecognitionException):
- # if we've already reported an error and have not matched a token
- # yet successfully, don't report any errors.
- if self.inErrorRecoveryMode(recognizer):
- return # don't report spurious errors
- self.beginErrorCondition(recognizer)
- if isinstance( e, NoViableAltException ):
- self.reportNoViableAlternative(recognizer, e)
- elif isinstance( e, InputMismatchException ):
- self.reportInputMismatch(recognizer, e)
- elif isinstance( e, FailedPredicateException ):
- self.reportFailedPredicate(recognizer, e)
- else:
- print("unknown recognition error type: " + type(e).__name__)
- recognizer.notifyErrorListeners(e.message, e.offendingToken, e)
- #
- # {@inheritDoc}
- #
- # <p>The default implementation resynchronizes the parser by consuming tokens
- # until we find one in the resynchronization set--loosely the set of tokens
- # that can follow the current rule.</p>
- #
- def recover(self, recognizer:Parser, e:RecognitionException):
- if self.lastErrorIndex==recognizer.getInputStream().index \
- and self.lastErrorStates is not None \
- and recognizer.state in self.lastErrorStates:
- # uh oh, another error at same token index and previously-visited
- # state in ATN; must be a case where LT(1) is in the recovery
- # token set so nothing got consumed. Consume a single token
- # at least to prevent an infinite loop; this is a failsafe.
- recognizer.consume()
- self.lastErrorIndex = recognizer._input.index
- if self.lastErrorStates is None:
- self.lastErrorStates = []
- self.lastErrorStates.append(recognizer.state)
- followSet = self.getErrorRecoverySet(recognizer)
- self.consumeUntil(recognizer, followSet)
- # The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
- # that the current lookahead symbol is consistent with what were expecting
- # at this point in the ATN. You can call this anytime but ANTLR only
- # generates code to check before subrules/loops and each iteration.
- #
- # <p>Implements Jim Idle's magic sync mechanism in closures and optional
- # subrules. E.g.,</p>
- #
- # <pre>
- # a : sync ( stuff sync )* ;
- # sync : {consume to what can follow sync} ;
- # </pre>
- #
- # At the start of a sub rule upon error, {@link #sync} performs single
- # token deletion, if possible. If it can't do that, it bails on the current
- # rule and uses the default error recovery, which consumes until the
- # resynchronization set of the current rule.
- #
- # <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
- # with an empty alternative), then the expected set includes what follows
- # the subrule.</p>
- #
- # <p>During loop iteration, it consumes until it sees a token that can start a
- # sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
- # stay in the loop as long as possible.</p>
- #
- # <p><strong>ORIGINS</strong></p>
- #
- # <p>Previous versions of ANTLR did a poor job of their recovery within loops.
- # A single mismatch token or missing token would force the parser to bail
- # out of the entire rules surrounding the loop. So, for rule</p>
- #
- # <pre>
- # classDef : 'class' ID '{' member* '}'
- # </pre>
- #
- # input with an extra token between members would force the parser to
- # consume until it found the next class definition rather than the next
- # member definition of the current class.
- #
- # <p>This functionality cost a little bit of effort because the parser has to
- # compare token set at the start of the loop and at each iteration. If for
- # some reason speed is suffering for you, you can turn off this
- # functionality by simply overriding this method as a blank { }.</p>
- #
- def sync(self, recognizer:Parser):
- # If already recovering, don't try to sync
- if self.inErrorRecoveryMode(recognizer):
- return
- s = recognizer._interp.atn.states[recognizer.state]
- la = recognizer.getTokenStream().LA(1)
- # try cheaper subset first; might get lucky. seems to shave a wee bit off
- nextTokens = recognizer.atn.nextTokens(s)
- if la in nextTokens:
- self.nextTokensContext = None
- self.nextTokenState = ATNState.INVALID_STATE_NUMBER
- return
- elif Token.EPSILON in nextTokens:
- if self.nextTokensContext is None:
- # It's possible the next token won't match information tracked
- # by sync is restricted for performance.
- self.nextTokensContext = recognizer._ctx
- self.nextTokensState = recognizer._stateNumber
- return
- if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START,
- ATNState.PLUS_BLOCK_START, ATNState.STAR_LOOP_ENTRY]:
- # report error and recover if possible
- if self.singleTokenDeletion(recognizer)is not None:
- return
- else:
- raise InputMismatchException(recognizer)
- elif s.stateType in [ATNState.PLUS_LOOP_BACK, ATNState.STAR_LOOP_BACK]:
- self.reportUnwantedToken(recognizer)
- expecting = recognizer.getExpectedTokens()
- whatFollowsLoopIterationOrRule = expecting.addSet(self.getErrorRecoverySet(recognizer))
- self.consumeUntil(recognizer, whatFollowsLoopIterationOrRule)
- else:
- # do nothing if we can't identify the exact kind of ATN state
- pass
- # This is called by {@link #reportError} when the exception is a
- # {@link NoViableAltException}.
- #
- # @see #reportError
- #
- # @param recognizer the parser instance
- # @param e the recognition exception
- #
- def reportNoViableAlternative(self, recognizer:Parser, e:NoViableAltException):
- tokens = recognizer.getTokenStream()
- if tokens is not None:
- if e.startToken.type==Token.EOF:
- input = "<EOF>"
- else:
- input = tokens.getText(e.startToken, e.offendingToken)
- else:
- input = "<unknown input>"
- msg = "no viable alternative at input " + self.escapeWSAndQuote(input)
- recognizer.notifyErrorListeners(msg, e.offendingToken, e)
- #
- # This is called by {@link #reportError} when the exception is an
- # {@link InputMismatchException}.
- #
- # @see #reportError
- #
- # @param recognizer the parser instance
- # @param e the recognition exception
- #
- def reportInputMismatch(self, recognizer:Parser, e:InputMismatchException):
- msg = "mismatched input " + self.getTokenErrorDisplay(e.offendingToken) \
- + " expecting " + e.getExpectedTokens().toString(recognizer.literalNames, recognizer.symbolicNames)
- recognizer.notifyErrorListeners(msg, e.offendingToken, e)
- #
- # This is called by {@link #reportError} when the exception is a
- # {@link FailedPredicateException}.
- #
- # @see #reportError
- #
- # @param recognizer the parser instance
- # @param e the recognition exception
- #
- def reportFailedPredicate(self, recognizer, e):
- ruleName = recognizer.ruleNames[recognizer._ctx.getRuleIndex()]
- msg = "rule " + ruleName + " " + e.message
- recognizer.notifyErrorListeners(msg, e.offendingToken, e)
- # This method is called to report a syntax error which requires the removal
- # of a token from the input stream. At the time this method is called, the
- # erroneous symbol is current {@code LT(1)} symbol and has not yet been
- # removed from the input stream. When this method returns,
- # {@code recognizer} is in error recovery mode.
- #
- # <p>This method is called when {@link #singleTokenDeletion} identifies
- # single-token deletion as a viable recovery strategy for a mismatched
- # input error.</p>
- #
- # <p>The default implementation simply returns if the handler is already in
- # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
- # enter error recovery mode, followed by calling
- # {@link Parser#notifyErrorListeners}.</p>
- #
- # @param recognizer the parser instance
- #
- def reportUnwantedToken(self, recognizer:Parser):
- if self.inErrorRecoveryMode(recognizer):
- return
- self.beginErrorCondition(recognizer)
- t = recognizer.getCurrentToken()
- tokenName = self.getTokenErrorDisplay(t)
- expecting = self.getExpectedTokens(recognizer)
- msg = "extraneous input " + tokenName + " expecting " \
- + expecting.toString(recognizer.literalNames, recognizer.symbolicNames)
- recognizer.notifyErrorListeners(msg, t, None)
- # This method is called to report a syntax error which requires the
- # insertion of a missing token into the input stream. At the time this
- # method is called, the missing token has not yet been inserted. When this
- # method returns, {@code recognizer} is in error recovery mode.
- #
- # <p>This method is called when {@link #singleTokenInsertion} identifies
- # single-token insertion as a viable recovery strategy for a mismatched
- # input error.</p>
- #
- # <p>The default implementation simply returns if the handler is already in
- # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
- # enter error recovery mode, followed by calling
- # {@link Parser#notifyErrorListeners}.</p>
- #
- # @param recognizer the parser instance
- #
- def reportMissingToken(self, recognizer:Parser):
- if self.inErrorRecoveryMode(recognizer):
- return
- self.beginErrorCondition(recognizer)
- t = recognizer.getCurrentToken()
- expecting = self.getExpectedTokens(recognizer)
- msg = "missing " + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) \
- + " at " + self.getTokenErrorDisplay(t)
- recognizer.notifyErrorListeners(msg, t, None)
- # <p>The default implementation attempts to recover from the mismatched input
- # by using single token insertion and deletion as described below. If the
- # recovery attempt fails, this method throws an
- # {@link InputMismatchException}.</p>
- #
- # <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
- #
- # <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
- # right token, however, then assume {@code LA(1)} is some extra spurious
- # token and delete it. Then consume and return the next token (which was
- # the {@code LA(2)} token) as the successful result of the match operation.</p>
- #
- # <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
- #
- # <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
- #
- # <p>If current token (at {@code LA(1)}) is consistent with what could come
- # after the expected {@code LA(1)} token, then assume the token is missing
- # and use the parser's {@link TokenFactory} to create it on the fly. The
- # "insertion" is performed by returning the created token as the successful
- # result of the match operation.</p>
- #
- # <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
- #
- # <p><strong>EXAMPLE</strong></p>
- #
- # <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
- # the parser returns from the nested call to {@code expr}, it will have
- # call chain:</p>
- #
- # <pre>
- # stat → expr → atom
- # </pre>
- #
- # and it will be trying to match the {@code ')'} at this point in the
- # derivation:
- #
- # <pre>
- # => ID '=' '(' INT ')' ('+' atom)* ';'
- # ^
- # </pre>
- #
- # The attempt to match {@code ')'} will fail when it sees {@code ';'} and
- # call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
- # is in the set of tokens that can follow the {@code ')'} token reference
- # in rule {@code atom}. It can assume that you forgot the {@code ')'}.
- #
- def recoverInline(self, recognizer:Parser):
- # SINGLE TOKEN DELETION
- matchedSymbol = self.singleTokenDeletion(recognizer)
- if matchedSymbol is not None:
- # we have deleted the extra token.
- # now, move past ttype token as if all were ok
- recognizer.consume()
- return matchedSymbol
- # SINGLE TOKEN INSERTION
- if self.singleTokenInsertion(recognizer):
- return self.getMissingSymbol(recognizer)
- # even that didn't work; must throw the exception
- raise InputMismatchException(recognizer)
- #
- # This method implements the single-token insertion inline error recovery
- # strategy. It is called by {@link #recoverInline} if the single-token
- # deletion strategy fails to recover from the mismatched input. If this
- # method returns {@code true}, {@code recognizer} will be in error recovery
- # mode.
- #
- # <p>This method determines whether or not single-token insertion is viable by
- # checking if the {@code LA(1)} input symbol could be successfully matched
- # if it were instead the {@code LA(2)} symbol. If this method returns
- # {@code true}, the caller is responsible for creating and inserting a
- # token with the correct type to produce this behavior.</p>
- #
- # @param recognizer the parser instance
- # @return {@code true} if single-token insertion is a viable recovery
- # strategy for the current mismatched input, otherwise {@code false}
- #
- def singleTokenInsertion(self, recognizer:Parser):
- currentSymbolType = recognizer.getTokenStream().LA(1)
- # if current token is consistent with what could come after current
- # ATN state, then we know we're missing a token; error recovery
- # is free to conjure up and insert the missing token
- atn = recognizer._interp.atn
- currentState = atn.states[recognizer.state]
- next = currentState.transitions[0].target
- expectingAtLL2 = atn.nextTokens(next, recognizer._ctx)
- if currentSymbolType in expectingAtLL2:
- self.reportMissingToken(recognizer)
- return True
- else:
- return False
- # This method implements the single-token deletion inline error recovery
- # strategy. It is called by {@link #recoverInline} to attempt to recover
- # from mismatched input. If this method returns null, the parser and error
- # handler state will not have changed. If this method returns non-null,
- # {@code recognizer} will <em>not</em> be in error recovery mode since the
- # returned token was a successful match.
- #
- # <p>If the single-token deletion is successful, this method calls
- # {@link #reportUnwantedToken} to report the error, followed by
- # {@link Parser#consume} to actually "delete" the extraneous token. Then,
- # before returning {@link #reportMatch} is called to signal a successful
- # match.</p>
- #
- # @param recognizer the parser instance
- # @return the successfully matched {@link Token} instance if single-token
- # deletion successfully recovers from the mismatched input, otherwise
- # {@code null}
- #
- def singleTokenDeletion(self, recognizer:Parser):
- nextTokenType = recognizer.getTokenStream().LA(2)
- expecting = self.getExpectedTokens(recognizer)
- if nextTokenType in expecting:
- self.reportUnwantedToken(recognizer)
- # print("recoverFromMismatchedToken deleting " \
- # + str(recognizer.getTokenStream().LT(1)) \
- # + " since " + str(recognizer.getTokenStream().LT(2)) \
- # + " is what we want", file=sys.stderr)
- recognizer.consume() # simply delete extra token
- # we want to return the token we're actually matching
- matchedSymbol = recognizer.getCurrentToken()
- self.reportMatch(recognizer) # we know current token is correct
- return matchedSymbol
- else:
- return None
- # Conjure up a missing token during error recovery.
- #
- # The recognizer attempts to recover from single missing
- # symbols. But, actions might refer to that missing symbol.
- # For example, x=ID {f($x);}. The action clearly assumes
- # that there has been an identifier matched previously and that
- # $x points at that token. If that token is missing, but
- # the next token in the stream is what we want we assume that
- # this token is missing and we keep going. Because we
- # have to return some token to replace the missing token,
- # we have to conjure one up. This method gives the user control
- # over the tokens returned for missing tokens. Mostly,
- # you will want to create something special for identifier
- # tokens. For literals such as '{' and ',', the default
- # action in the parser or tree parser works. It simply creates
- # a CommonToken of the appropriate type. The text will be the token.
- # If you change what tokens must be created by the lexer,
- # override this method to create the appropriate tokens.
- #
- def getMissingSymbol(self, recognizer:Parser):
- currentSymbol = recognizer.getCurrentToken()
- expecting = self.getExpectedTokens(recognizer)
- expectedTokenType = expecting[0] # get any element
- if expectedTokenType==Token.EOF:
- tokenText = "<missing EOF>"
- else:
- name = None
- if expectedTokenType < len(recognizer.literalNames):
- name = recognizer.literalNames[expectedTokenType]
- if name is None and expectedTokenType < len(recognizer.symbolicNames):
- name = recognizer.symbolicNames[expectedTokenType]
- tokenText = "<missing " + str(name) + ">"
- current = currentSymbol
- lookback = recognizer.getTokenStream().LT(-1)
- if current.type==Token.EOF and lookback is not None:
- current = lookback
- return recognizer.getTokenFactory().create(current.source,
- expectedTokenType, tokenText, Token.DEFAULT_CHANNEL,
- -1, -1, current.line, current.column)
- def getExpectedTokens(self, recognizer:Parser):
- return recognizer.getExpectedTokens()
- # How should a token be displayed in an error message? The default
- # is to display just the text, but during development you might
- # want to have a lot of information spit out. Override in that case
- # to use t.toString() (which, for CommonToken, dumps everything about
- # the token). This is better than forcing you to override a method in
- # your token objects because you don't have to go modify your lexer
- # so that it creates a new Java type.
- #
- def getTokenErrorDisplay(self, t:Token):
- if t is None:
- return "<no token>"
- s = t.text
- if s is None:
- if t.type==Token.EOF:
- s = "<EOF>"
- else:
- s = "<" + str(t.type) + ">"
- return self.escapeWSAndQuote(s)
- def escapeWSAndQuote(self, s:str):
- s = s.replace("\n","\\n")
- s = s.replace("\r","\\r")
- s = s.replace("\t","\\t")
- return "'" + s + "'"
- # Compute the error recovery set for the current rule. During
- # rule invocation, the parser pushes the set of tokens that can
- # follow that rule reference on the stack; this amounts to
- # computing FIRST of what follows the rule reference in the
- # enclosing rule. See LinearApproximator.FIRST().
- # This local follow set only includes tokens
- # from within the rule; i.e., the FIRST computation done by
- # ANTLR stops at the end of a rule.
- #
- # EXAMPLE
- #
- # When you find a "no viable alt exception", the input is not
- # consistent with any of the alternatives for rule r. The best
- # thing to do is to consume tokens until you see something that
- # can legally follow a call to r#or* any rule that called r.
- # You don't want the exact set of viable next tokens because the
- # input might just be missing a token--you might consume the
- # rest of the input looking for one of the missing tokens.
- #
- # Consider grammar:
- #
- # a : '[' b ']'
- # | '(' b ')'
- # ;
- # b : c '^' INT ;
- # c : ID
- # | INT
- # ;
- #
- # At each rule invocation, the set of tokens that could follow
- # that rule is pushed on a stack. Here are the various
- # context-sensitive follow sets:
- #
- # FOLLOW(b1_in_a) = FIRST(']') = ']'
- # FOLLOW(b2_in_a) = FIRST(')') = ')'
- # FOLLOW(c_in_b) = FIRST('^') = '^'
- #
- # Upon erroneous input "[]", the call chain is
- #
- # a -> b -> c
- #
- # and, hence, the follow context stack is:
- #
- # depth follow set start of rule execution
- # 0 <EOF> a (from main())
- # 1 ']' b
- # 2 '^' c
- #
- # Notice that ')' is not included, because b would have to have
- # been called from a different context in rule a for ')' to be
- # included.
- #
- # For error recovery, we cannot consider FOLLOW(c)
- # (context-sensitive or otherwise). We need the combined set of
- # all context-sensitive FOLLOW sets--the set of all tokens that
- # could follow any reference in the call chain. We need to
- # resync to one of those tokens. Note that FOLLOW(c)='^' and if
- # we resync'd to that token, we'd consume until EOF. We need to
- # sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
- # In this case, for input "[]", LA(1) is ']' and in the set, so we would
- # not consume anything. After printing an error, rule c would
- # return normally. Rule b would not find the required '^' though.
- # At this point, it gets a mismatched token error and throws an
- # exception (since LA(1) is not in the viable following token
- # set). The rule exception handler tries to recover, but finds
- # the same recovery set and doesn't consume anything. Rule b
- # exits normally returning to rule a. Now it finds the ']' (and
- # with the successful match exits errorRecovery mode).
- #
- # So, you can see that the parser walks up the call chain looking
- # for the token that was a member of the recovery set.
- #
- # Errors are not generated in errorRecovery mode.
- #
- # ANTLR's error recovery mechanism is based upon original ideas:
- #
- # "Algorithms + Data Structures = Programs" by Niklaus Wirth
- #
- # and
- #
- # "A note on error recovery in recursive descent parsers":
- # http:#portal.acm.org/citation.cfm?id=947902.947905
- #
- # Later, Josef Grosch had some good ideas:
- #
- # "Efficient and Comfortable Error Recovery in Recursive Descent
- # Parsers":
- # ftp:#www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
- #
- # Like Grosch I implement context-sensitive FOLLOW sets that are combined
- # at run-time upon error to avoid overhead during parsing.
- #
- def getErrorRecoverySet(self, recognizer:Parser):
- atn = recognizer._interp.atn
- ctx = recognizer._ctx
- recoverSet = IntervalSet()
- while ctx is not None and ctx.invokingState>=0:
- # compute what follows who invoked us
- invokingState = atn.states[ctx.invokingState]
- rt = invokingState.transitions[0]
- follow = atn.nextTokens(rt.followState)
- recoverSet.addSet(follow)
- ctx = ctx.parentCtx
- recoverSet.removeOne(Token.EPSILON)
- return recoverSet
- # Consume tokens until one matches the given token set.#
- def consumeUntil(self, recognizer:Parser, set_:set):
- ttype = recognizer.getTokenStream().LA(1)
- while ttype != Token.EOF and not ttype in set_:
- recognizer.consume()
- ttype = recognizer.getTokenStream().LA(1)
- #
- # This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
- # by immediately canceling the parse operation with a
- # {@link ParseCancellationException}. The implementation ensures that the
- # {@link ParserRuleContext#exception} field is set for all parse tree nodes
- # that were not completed prior to encountering the error.
- #
- # <p>
- # This error strategy is useful in the following scenarios.</p>
- #
- # <ul>
- # <li><strong>Two-stage parsing:</strong> This error strategy allows the first
- # stage of two-stage parsing to immediately terminate if an error is
- # encountered, and immediately fall back to the second stage. In addition to
- # avoiding wasted work by attempting to recover from errors here, the empty
- # implementation of {@link BailErrorStrategy#sync} improves the performance of
- # the first stage.</li>
- # <li><strong>Silent validation:</strong> When syntax errors are not being
- # reported or logged, and the parse result is simply ignored if errors occur,
- # the {@link BailErrorStrategy} avoids wasting work on recovering from errors
- # when the result will be ignored either way.</li>
- # </ul>
- #
- # <p>
- # {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
- #
- # @see Parser#setErrorHandler(ANTLRErrorStrategy)
- #
- class BailErrorStrategy(DefaultErrorStrategy):
- # Instead of recovering from exception {@code e}, re-throw it wrapped
- # in a {@link ParseCancellationException} so it is not caught by the
- # rule function catches. Use {@link Exception#getCause()} to get the
- # original {@link RecognitionException}.
- #
- def recover(self, recognizer:Parser, e:RecognitionException):
- context = recognizer._ctx
- while context is not None:
- context.exception = e
- context = context.parentCtx
- raise ParseCancellationException(e)
- # Make sure we don't attempt to recover inline; if the parser
- # successfully recovers, it won't throw an exception.
- #
- def recoverInline(self, recognizer:Parser):
- self.recover(recognizer, InputMismatchException(recognizer))
- # Make sure we don't attempt to recover from problems in subrules.#
- def sync(self, recognizer:Parser):
- pass
- del Parser
|