ErrorStrategy.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709
  1. #
  2. # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  3. # Use of this file is governed by the BSD 3-clause license that
  4. # can be found in the LICENSE.txt file in the project root.
  5. #
  6. import sys
  7. from antlr4.IntervalSet import IntervalSet
  8. from antlr4.Token import Token
  9. from antlr4.atn.ATNState import ATNState
  10. from antlr4.error.Errors import RecognitionException, NoViableAltException, InputMismatchException, \
  11. FailedPredicateException, ParseCancellationException
  12. # need forward declaration
  13. Parser = None
  14. class ErrorStrategy(object):
  15. def reset(self, recognizer:Parser):
  16. pass
  17. def recoverInline(self, recognizer:Parser):
  18. pass
  19. def recover(self, recognizer:Parser, e:RecognitionException):
  20. pass
  21. def sync(self, recognizer:Parser):
  22. pass
  23. def inErrorRecoveryMode(self, recognizer:Parser):
  24. pass
  25. def reportError(self, recognizer:Parser, e:RecognitionException):
  26. pass
  27. # This is the default implementation of {@link ANTLRErrorStrategy} used for
  28. # error reporting and recovery in ANTLR parsers.
  29. #
  30. class DefaultErrorStrategy(ErrorStrategy):
  31. def __init__(self):
  32. super().__init__()
  33. # Indicates whether the error strategy is currently "recovering from an
  34. # error". This is used to suppress reporting multiple error messages while
  35. # attempting to recover from a detected syntax error.
  36. #
  37. # @see #inErrorRecoveryMode
  38. #
  39. self.errorRecoveryMode = False
  40. # The index into the input stream where the last error occurred.
  41. # This is used to prevent infinite loops where an error is found
  42. # but no token is consumed during recovery...another error is found,
  43. # ad nauseum. This is a failsafe mechanism to guarantee that at least
  44. # one token/tree node is consumed for two errors.
  45. #
  46. self.lastErrorIndex = -1
  47. self.lastErrorStates = None
  48. self.nextTokensContext = None
  49. self.nextTokenState = 0
  50. # <p>The default implementation simply calls {@link #endErrorCondition} to
  51. # ensure that the handler is not in error recovery mode.</p>
  52. def reset(self, recognizer:Parser):
  53. self.endErrorCondition(recognizer)
  54. #
  55. # This method is called to enter error recovery mode when a recognition
  56. # exception is reported.
  57. #
  58. # @param recognizer the parser instance
  59. #
  60. def beginErrorCondition(self, recognizer:Parser):
  61. self.errorRecoveryMode = True
  62. def inErrorRecoveryMode(self, recognizer:Parser):
  63. return self.errorRecoveryMode
  64. #
  65. # This method is called to leave error recovery mode after recovering from
  66. # a recognition exception.
  67. #
  68. # @param recognizer
  69. #
  70. def endErrorCondition(self, recognizer:Parser):
  71. self.errorRecoveryMode = False
  72. self.lastErrorStates = None
  73. self.lastErrorIndex = -1
  74. #
  75. # {@inheritDoc}
  76. #
  77. # <p>The default implementation simply calls {@link #endErrorCondition}.</p>
  78. #
  79. def reportMatch(self, recognizer:Parser):
  80. self.endErrorCondition(recognizer)
  81. #
  82. # {@inheritDoc}
  83. #
  84. # <p>The default implementation returns immediately if the handler is already
  85. # in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
  86. # and dispatches the reporting task based on the runtime type of {@code e}
  87. # according to the following table.</p>
  88. #
  89. # <ul>
  90. # <li>{@link NoViableAltException}: Dispatches the call to
  91. # {@link #reportNoViableAlternative}</li>
  92. # <li>{@link InputMismatchException}: Dispatches the call to
  93. # {@link #reportInputMismatch}</li>
  94. # <li>{@link FailedPredicateException}: Dispatches the call to
  95. # {@link #reportFailedPredicate}</li>
  96. # <li>All other types: calls {@link Parser#notifyErrorListeners} to report
  97. # the exception</li>
  98. # </ul>
  99. #
  100. def reportError(self, recognizer:Parser, e:RecognitionException):
  101. # if we've already reported an error and have not matched a token
  102. # yet successfully, don't report any errors.
  103. if self.inErrorRecoveryMode(recognizer):
  104. return # don't report spurious errors
  105. self.beginErrorCondition(recognizer)
  106. if isinstance( e, NoViableAltException ):
  107. self.reportNoViableAlternative(recognizer, e)
  108. elif isinstance( e, InputMismatchException ):
  109. self.reportInputMismatch(recognizer, e)
  110. elif isinstance( e, FailedPredicateException ):
  111. self.reportFailedPredicate(recognizer, e)
  112. else:
  113. print("unknown recognition error type: " + type(e).__name__)
  114. recognizer.notifyErrorListeners(e.message, e.offendingToken, e)
  115. #
  116. # {@inheritDoc}
  117. #
  118. # <p>The default implementation resynchronizes the parser by consuming tokens
  119. # until we find one in the resynchronization set--loosely the set of tokens
  120. # that can follow the current rule.</p>
  121. #
  122. def recover(self, recognizer:Parser, e:RecognitionException):
  123. if self.lastErrorIndex==recognizer.getInputStream().index \
  124. and self.lastErrorStates is not None \
  125. and recognizer.state in self.lastErrorStates:
  126. # uh oh, another error at same token index and previously-visited
  127. # state in ATN; must be a case where LT(1) is in the recovery
  128. # token set so nothing got consumed. Consume a single token
  129. # at least to prevent an infinite loop; this is a failsafe.
  130. recognizer.consume()
  131. self.lastErrorIndex = recognizer._input.index
  132. if self.lastErrorStates is None:
  133. self.lastErrorStates = []
  134. self.lastErrorStates.append(recognizer.state)
  135. followSet = self.getErrorRecoverySet(recognizer)
  136. self.consumeUntil(recognizer, followSet)
  137. # The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
  138. # that the current lookahead symbol is consistent with what were expecting
  139. # at this point in the ATN. You can call this anytime but ANTLR only
  140. # generates code to check before subrules/loops and each iteration.
  141. #
  142. # <p>Implements Jim Idle's magic sync mechanism in closures and optional
  143. # subrules. E.g.,</p>
  144. #
  145. # <pre>
  146. # a : sync ( stuff sync )* ;
  147. # sync : {consume to what can follow sync} ;
  148. # </pre>
  149. #
  150. # At the start of a sub rule upon error, {@link #sync} performs single
  151. # token deletion, if possible. If it can't do that, it bails on the current
  152. # rule and uses the default error recovery, which consumes until the
  153. # resynchronization set of the current rule.
  154. #
  155. # <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
  156. # with an empty alternative), then the expected set includes what follows
  157. # the subrule.</p>
  158. #
  159. # <p>During loop iteration, it consumes until it sees a token that can start a
  160. # sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
  161. # stay in the loop as long as possible.</p>
  162. #
  163. # <p><strong>ORIGINS</strong></p>
  164. #
  165. # <p>Previous versions of ANTLR did a poor job of their recovery within loops.
  166. # A single mismatch token or missing token would force the parser to bail
  167. # out of the entire rules surrounding the loop. So, for rule</p>
  168. #
  169. # <pre>
  170. # classDef : 'class' ID '{' member* '}'
  171. # </pre>
  172. #
  173. # input with an extra token between members would force the parser to
  174. # consume until it found the next class definition rather than the next
  175. # member definition of the current class.
  176. #
  177. # <p>This functionality cost a little bit of effort because the parser has to
  178. # compare token set at the start of the loop and at each iteration. If for
  179. # some reason speed is suffering for you, you can turn off this
  180. # functionality by simply overriding this method as a blank { }.</p>
  181. #
  182. def sync(self, recognizer:Parser):
  183. # If already recovering, don't try to sync
  184. if self.inErrorRecoveryMode(recognizer):
  185. return
  186. s = recognizer._interp.atn.states[recognizer.state]
  187. la = recognizer.getTokenStream().LA(1)
  188. # try cheaper subset first; might get lucky. seems to shave a wee bit off
  189. nextTokens = recognizer.atn.nextTokens(s)
  190. if la in nextTokens:
  191. self.nextTokensContext = None
  192. self.nextTokenState = ATNState.INVALID_STATE_NUMBER
  193. return
  194. elif Token.EPSILON in nextTokens:
  195. if self.nextTokensContext is None:
  196. # It's possible the next token won't match information tracked
  197. # by sync is restricted for performance.
  198. self.nextTokensContext = recognizer._ctx
  199. self.nextTokensState = recognizer._stateNumber
  200. return
  201. if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START,
  202. ATNState.PLUS_BLOCK_START, ATNState.STAR_LOOP_ENTRY]:
  203. # report error and recover if possible
  204. if self.singleTokenDeletion(recognizer)is not None:
  205. return
  206. else:
  207. raise InputMismatchException(recognizer)
  208. elif s.stateType in [ATNState.PLUS_LOOP_BACK, ATNState.STAR_LOOP_BACK]:
  209. self.reportUnwantedToken(recognizer)
  210. expecting = recognizer.getExpectedTokens()
  211. whatFollowsLoopIterationOrRule = expecting.addSet(self.getErrorRecoverySet(recognizer))
  212. self.consumeUntil(recognizer, whatFollowsLoopIterationOrRule)
  213. else:
  214. # do nothing if we can't identify the exact kind of ATN state
  215. pass
  216. # This is called by {@link #reportError} when the exception is a
  217. # {@link NoViableAltException}.
  218. #
  219. # @see #reportError
  220. #
  221. # @param recognizer the parser instance
  222. # @param e the recognition exception
  223. #
  224. def reportNoViableAlternative(self, recognizer:Parser, e:NoViableAltException):
  225. tokens = recognizer.getTokenStream()
  226. if tokens is not None:
  227. if e.startToken.type==Token.EOF:
  228. input = "<EOF>"
  229. else:
  230. input = tokens.getText(e.startToken, e.offendingToken)
  231. else:
  232. input = "<unknown input>"
  233. msg = "no viable alternative at input " + self.escapeWSAndQuote(input)
  234. recognizer.notifyErrorListeners(msg, e.offendingToken, e)
  235. #
  236. # This is called by {@link #reportError} when the exception is an
  237. # {@link InputMismatchException}.
  238. #
  239. # @see #reportError
  240. #
  241. # @param recognizer the parser instance
  242. # @param e the recognition exception
  243. #
  244. def reportInputMismatch(self, recognizer:Parser, e:InputMismatchException):
  245. msg = "mismatched input " + self.getTokenErrorDisplay(e.offendingToken) \
  246. + " expecting " + e.getExpectedTokens().toString(recognizer.literalNames, recognizer.symbolicNames)
  247. recognizer.notifyErrorListeners(msg, e.offendingToken, e)
  248. #
  249. # This is called by {@link #reportError} when the exception is a
  250. # {@link FailedPredicateException}.
  251. #
  252. # @see #reportError
  253. #
  254. # @param recognizer the parser instance
  255. # @param e the recognition exception
  256. #
  257. def reportFailedPredicate(self, recognizer, e):
  258. ruleName = recognizer.ruleNames[recognizer._ctx.getRuleIndex()]
  259. msg = "rule " + ruleName + " " + e.message
  260. recognizer.notifyErrorListeners(msg, e.offendingToken, e)
  261. # This method is called to report a syntax error which requires the removal
  262. # of a token from the input stream. At the time this method is called, the
  263. # erroneous symbol is current {@code LT(1)} symbol and has not yet been
  264. # removed from the input stream. When this method returns,
  265. # {@code recognizer} is in error recovery mode.
  266. #
  267. # <p>This method is called when {@link #singleTokenDeletion} identifies
  268. # single-token deletion as a viable recovery strategy for a mismatched
  269. # input error.</p>
  270. #
  271. # <p>The default implementation simply returns if the handler is already in
  272. # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
  273. # enter error recovery mode, followed by calling
  274. # {@link Parser#notifyErrorListeners}.</p>
  275. #
  276. # @param recognizer the parser instance
  277. #
  278. def reportUnwantedToken(self, recognizer:Parser):
  279. if self.inErrorRecoveryMode(recognizer):
  280. return
  281. self.beginErrorCondition(recognizer)
  282. t = recognizer.getCurrentToken()
  283. tokenName = self.getTokenErrorDisplay(t)
  284. expecting = self.getExpectedTokens(recognizer)
  285. msg = "extraneous input " + tokenName + " expecting " \
  286. + expecting.toString(recognizer.literalNames, recognizer.symbolicNames)
  287. recognizer.notifyErrorListeners(msg, t, None)
  288. # This method is called to report a syntax error which requires the
  289. # insertion of a missing token into the input stream. At the time this
  290. # method is called, the missing token has not yet been inserted. When this
  291. # method returns, {@code recognizer} is in error recovery mode.
  292. #
  293. # <p>This method is called when {@link #singleTokenInsertion} identifies
  294. # single-token insertion as a viable recovery strategy for a mismatched
  295. # input error.</p>
  296. #
  297. # <p>The default implementation simply returns if the handler is already in
  298. # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
  299. # enter error recovery mode, followed by calling
  300. # {@link Parser#notifyErrorListeners}.</p>
  301. #
  302. # @param recognizer the parser instance
  303. #
  304. def reportMissingToken(self, recognizer:Parser):
  305. if self.inErrorRecoveryMode(recognizer):
  306. return
  307. self.beginErrorCondition(recognizer)
  308. t = recognizer.getCurrentToken()
  309. expecting = self.getExpectedTokens(recognizer)
  310. msg = "missing " + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) \
  311. + " at " + self.getTokenErrorDisplay(t)
  312. recognizer.notifyErrorListeners(msg, t, None)
  313. # <p>The default implementation attempts to recover from the mismatched input
  314. # by using single token insertion and deletion as described below. If the
  315. # recovery attempt fails, this method throws an
  316. # {@link InputMismatchException}.</p>
  317. #
  318. # <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
  319. #
  320. # <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
  321. # right token, however, then assume {@code LA(1)} is some extra spurious
  322. # token and delete it. Then consume and return the next token (which was
  323. # the {@code LA(2)} token) as the successful result of the match operation.</p>
  324. #
  325. # <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
  326. #
  327. # <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
  328. #
  329. # <p>If current token (at {@code LA(1)}) is consistent with what could come
  330. # after the expected {@code LA(1)} token, then assume the token is missing
  331. # and use the parser's {@link TokenFactory} to create it on the fly. The
  332. # "insertion" is performed by returning the created token as the successful
  333. # result of the match operation.</p>
  334. #
  335. # <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
  336. #
  337. # <p><strong>EXAMPLE</strong></p>
  338. #
  339. # <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
  340. # the parser returns from the nested call to {@code expr}, it will have
  341. # call chain:</p>
  342. #
  343. # <pre>
  344. # stat &rarr; expr &rarr; atom
  345. # </pre>
  346. #
  347. # and it will be trying to match the {@code ')'} at this point in the
  348. # derivation:
  349. #
  350. # <pre>
  351. # =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
  352. # ^
  353. # </pre>
  354. #
  355. # The attempt to match {@code ')'} will fail when it sees {@code ';'} and
  356. # call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
  357. # is in the set of tokens that can follow the {@code ')'} token reference
  358. # in rule {@code atom}. It can assume that you forgot the {@code ')'}.
  359. #
  360. def recoverInline(self, recognizer:Parser):
  361. # SINGLE TOKEN DELETION
  362. matchedSymbol = self.singleTokenDeletion(recognizer)
  363. if matchedSymbol is not None:
  364. # we have deleted the extra token.
  365. # now, move past ttype token as if all were ok
  366. recognizer.consume()
  367. return matchedSymbol
  368. # SINGLE TOKEN INSERTION
  369. if self.singleTokenInsertion(recognizer):
  370. return self.getMissingSymbol(recognizer)
  371. # even that didn't work; must throw the exception
  372. raise InputMismatchException(recognizer)
  373. #
  374. # This method implements the single-token insertion inline error recovery
  375. # strategy. It is called by {@link #recoverInline} if the single-token
  376. # deletion strategy fails to recover from the mismatched input. If this
  377. # method returns {@code true}, {@code recognizer} will be in error recovery
  378. # mode.
  379. #
  380. # <p>This method determines whether or not single-token insertion is viable by
  381. # checking if the {@code LA(1)} input symbol could be successfully matched
  382. # if it were instead the {@code LA(2)} symbol. If this method returns
  383. # {@code true}, the caller is responsible for creating and inserting a
  384. # token with the correct type to produce this behavior.</p>
  385. #
  386. # @param recognizer the parser instance
  387. # @return {@code true} if single-token insertion is a viable recovery
  388. # strategy for the current mismatched input, otherwise {@code false}
  389. #
  390. def singleTokenInsertion(self, recognizer:Parser):
  391. currentSymbolType = recognizer.getTokenStream().LA(1)
  392. # if current token is consistent with what could come after current
  393. # ATN state, then we know we're missing a token; error recovery
  394. # is free to conjure up and insert the missing token
  395. atn = recognizer._interp.atn
  396. currentState = atn.states[recognizer.state]
  397. next = currentState.transitions[0].target
  398. expectingAtLL2 = atn.nextTokens(next, recognizer._ctx)
  399. if currentSymbolType in expectingAtLL2:
  400. self.reportMissingToken(recognizer)
  401. return True
  402. else:
  403. return False
  404. # This method implements the single-token deletion inline error recovery
  405. # strategy. It is called by {@link #recoverInline} to attempt to recover
  406. # from mismatched input. If this method returns null, the parser and error
  407. # handler state will not have changed. If this method returns non-null,
  408. # {@code recognizer} will <em>not</em> be in error recovery mode since the
  409. # returned token was a successful match.
  410. #
  411. # <p>If the single-token deletion is successful, this method calls
  412. # {@link #reportUnwantedToken} to report the error, followed by
  413. # {@link Parser#consume} to actually "delete" the extraneous token. Then,
  414. # before returning {@link #reportMatch} is called to signal a successful
  415. # match.</p>
  416. #
  417. # @param recognizer the parser instance
  418. # @return the successfully matched {@link Token} instance if single-token
  419. # deletion successfully recovers from the mismatched input, otherwise
  420. # {@code null}
  421. #
  422. def singleTokenDeletion(self, recognizer:Parser):
  423. nextTokenType = recognizer.getTokenStream().LA(2)
  424. expecting = self.getExpectedTokens(recognizer)
  425. if nextTokenType in expecting:
  426. self.reportUnwantedToken(recognizer)
  427. # print("recoverFromMismatchedToken deleting " \
  428. # + str(recognizer.getTokenStream().LT(1)) \
  429. # + " since " + str(recognizer.getTokenStream().LT(2)) \
  430. # + " is what we want", file=sys.stderr)
  431. recognizer.consume() # simply delete extra token
  432. # we want to return the token we're actually matching
  433. matchedSymbol = recognizer.getCurrentToken()
  434. self.reportMatch(recognizer) # we know current token is correct
  435. return matchedSymbol
  436. else:
  437. return None
  438. # Conjure up a missing token during error recovery.
  439. #
  440. # The recognizer attempts to recover from single missing
  441. # symbols. But, actions might refer to that missing symbol.
  442. # For example, x=ID {f($x);}. The action clearly assumes
  443. # that there has been an identifier matched previously and that
  444. # $x points at that token. If that token is missing, but
  445. # the next token in the stream is what we want we assume that
  446. # this token is missing and we keep going. Because we
  447. # have to return some token to replace the missing token,
  448. # we have to conjure one up. This method gives the user control
  449. # over the tokens returned for missing tokens. Mostly,
  450. # you will want to create something special for identifier
  451. # tokens. For literals such as '{' and ',', the default
  452. # action in the parser or tree parser works. It simply creates
  453. # a CommonToken of the appropriate type. The text will be the token.
  454. # If you change what tokens must be created by the lexer,
  455. # override this method to create the appropriate tokens.
  456. #
  457. def getMissingSymbol(self, recognizer:Parser):
  458. currentSymbol = recognizer.getCurrentToken()
  459. expecting = self.getExpectedTokens(recognizer)
  460. expectedTokenType = expecting[0] # get any element
  461. if expectedTokenType==Token.EOF:
  462. tokenText = "<missing EOF>"
  463. else:
  464. name = None
  465. if expectedTokenType < len(recognizer.literalNames):
  466. name = recognizer.literalNames[expectedTokenType]
  467. if name is None and expectedTokenType < len(recognizer.symbolicNames):
  468. name = recognizer.symbolicNames[expectedTokenType]
  469. tokenText = "<missing " + str(name) + ">"
  470. current = currentSymbol
  471. lookback = recognizer.getTokenStream().LT(-1)
  472. if current.type==Token.EOF and lookback is not None:
  473. current = lookback
  474. return recognizer.getTokenFactory().create(current.source,
  475. expectedTokenType, tokenText, Token.DEFAULT_CHANNEL,
  476. -1, -1, current.line, current.column)
  477. def getExpectedTokens(self, recognizer:Parser):
  478. return recognizer.getExpectedTokens()
  479. # How should a token be displayed in an error message? The default
  480. # is to display just the text, but during development you might
  481. # want to have a lot of information spit out. Override in that case
  482. # to use t.toString() (which, for CommonToken, dumps everything about
  483. # the token). This is better than forcing you to override a method in
  484. # your token objects because you don't have to go modify your lexer
  485. # so that it creates a new Java type.
  486. #
  487. def getTokenErrorDisplay(self, t:Token):
  488. if t is None:
  489. return "<no token>"
  490. s = t.text
  491. if s is None:
  492. if t.type==Token.EOF:
  493. s = "<EOF>"
  494. else:
  495. s = "<" + str(t.type) + ">"
  496. return self.escapeWSAndQuote(s)
  497. def escapeWSAndQuote(self, s:str):
  498. s = s.replace("\n","\\n")
  499. s = s.replace("\r","\\r")
  500. s = s.replace("\t","\\t")
  501. return "'" + s + "'"
  502. # Compute the error recovery set for the current rule. During
  503. # rule invocation, the parser pushes the set of tokens that can
  504. # follow that rule reference on the stack; this amounts to
  505. # computing FIRST of what follows the rule reference in the
  506. # enclosing rule. See LinearApproximator.FIRST().
  507. # This local follow set only includes tokens
  508. # from within the rule; i.e., the FIRST computation done by
  509. # ANTLR stops at the end of a rule.
  510. #
  511. # EXAMPLE
  512. #
  513. # When you find a "no viable alt exception", the input is not
  514. # consistent with any of the alternatives for rule r. The best
  515. # thing to do is to consume tokens until you see something that
  516. # can legally follow a call to r#or* any rule that called r.
  517. # You don't want the exact set of viable next tokens because the
  518. # input might just be missing a token--you might consume the
  519. # rest of the input looking for one of the missing tokens.
  520. #
  521. # Consider grammar:
  522. #
  523. # a : '[' b ']'
  524. # | '(' b ')'
  525. # ;
  526. # b : c '^' INT ;
  527. # c : ID
  528. # | INT
  529. # ;
  530. #
  531. # At each rule invocation, the set of tokens that could follow
  532. # that rule is pushed on a stack. Here are the various
  533. # context-sensitive follow sets:
  534. #
  535. # FOLLOW(b1_in_a) = FIRST(']') = ']'
  536. # FOLLOW(b2_in_a) = FIRST(')') = ')'
  537. # FOLLOW(c_in_b) = FIRST('^') = '^'
  538. #
  539. # Upon erroneous input "[]", the call chain is
  540. #
  541. # a -> b -> c
  542. #
  543. # and, hence, the follow context stack is:
  544. #
  545. # depth follow set start of rule execution
  546. # 0 <EOF> a (from main())
  547. # 1 ']' b
  548. # 2 '^' c
  549. #
  550. # Notice that ')' is not included, because b would have to have
  551. # been called from a different context in rule a for ')' to be
  552. # included.
  553. #
  554. # For error recovery, we cannot consider FOLLOW(c)
  555. # (context-sensitive or otherwise). We need the combined set of
  556. # all context-sensitive FOLLOW sets--the set of all tokens that
  557. # could follow any reference in the call chain. We need to
  558. # resync to one of those tokens. Note that FOLLOW(c)='^' and if
  559. # we resync'd to that token, we'd consume until EOF. We need to
  560. # sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
  561. # In this case, for input "[]", LA(1) is ']' and in the set, so we would
  562. # not consume anything. After printing an error, rule c would
  563. # return normally. Rule b would not find the required '^' though.
  564. # At this point, it gets a mismatched token error and throws an
  565. # exception (since LA(1) is not in the viable following token
  566. # set). The rule exception handler tries to recover, but finds
  567. # the same recovery set and doesn't consume anything. Rule b
  568. # exits normally returning to rule a. Now it finds the ']' (and
  569. # with the successful match exits errorRecovery mode).
  570. #
  571. # So, you can see that the parser walks up the call chain looking
  572. # for the token that was a member of the recovery set.
  573. #
  574. # Errors are not generated in errorRecovery mode.
  575. #
  576. # ANTLR's error recovery mechanism is based upon original ideas:
  577. #
  578. # "Algorithms + Data Structures = Programs" by Niklaus Wirth
  579. #
  580. # and
  581. #
  582. # "A note on error recovery in recursive descent parsers":
  583. # http:#portal.acm.org/citation.cfm?id=947902.947905
  584. #
  585. # Later, Josef Grosch had some good ideas:
  586. #
  587. # "Efficient and Comfortable Error Recovery in Recursive Descent
  588. # Parsers":
  589. # ftp:#www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
  590. #
  591. # Like Grosch I implement context-sensitive FOLLOW sets that are combined
  592. # at run-time upon error to avoid overhead during parsing.
  593. #
  594. def getErrorRecoverySet(self, recognizer:Parser):
  595. atn = recognizer._interp.atn
  596. ctx = recognizer._ctx
  597. recoverSet = IntervalSet()
  598. while ctx is not None and ctx.invokingState>=0:
  599. # compute what follows who invoked us
  600. invokingState = atn.states[ctx.invokingState]
  601. rt = invokingState.transitions[0]
  602. follow = atn.nextTokens(rt.followState)
  603. recoverSet.addSet(follow)
  604. ctx = ctx.parentCtx
  605. recoverSet.removeOne(Token.EPSILON)
  606. return recoverSet
  607. # Consume tokens until one matches the given token set.#
  608. def consumeUntil(self, recognizer:Parser, set_:set):
  609. ttype = recognizer.getTokenStream().LA(1)
  610. while ttype != Token.EOF and not ttype in set_:
  611. recognizer.consume()
  612. ttype = recognizer.getTokenStream().LA(1)
  613. #
  614. # This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
  615. # by immediately canceling the parse operation with a
  616. # {@link ParseCancellationException}. The implementation ensures that the
  617. # {@link ParserRuleContext#exception} field is set for all parse tree nodes
  618. # that were not completed prior to encountering the error.
  619. #
  620. # <p>
  621. # This error strategy is useful in the following scenarios.</p>
  622. #
  623. # <ul>
  624. # <li><strong>Two-stage parsing:</strong> This error strategy allows the first
  625. # stage of two-stage parsing to immediately terminate if an error is
  626. # encountered, and immediately fall back to the second stage. In addition to
  627. # avoiding wasted work by attempting to recover from errors here, the empty
  628. # implementation of {@link BailErrorStrategy#sync} improves the performance of
  629. # the first stage.</li>
  630. # <li><strong>Silent validation:</strong> When syntax errors are not being
  631. # reported or logged, and the parse result is simply ignored if errors occur,
  632. # the {@link BailErrorStrategy} avoids wasting work on recovering from errors
  633. # when the result will be ignored either way.</li>
  634. # </ul>
  635. #
  636. # <p>
  637. # {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
  638. #
  639. # @see Parser#setErrorHandler(ANTLRErrorStrategy)
  640. #
  641. class BailErrorStrategy(DefaultErrorStrategy):
  642. # Instead of recovering from exception {@code e}, re-throw it wrapped
  643. # in a {@link ParseCancellationException} so it is not caught by the
  644. # rule function catches. Use {@link Exception#getCause()} to get the
  645. # original {@link RecognitionException}.
  646. #
  647. def recover(self, recognizer:Parser, e:RecognitionException):
  648. context = recognizer._ctx
  649. while context is not None:
  650. context.exception = e
  651. context = context.parentCtx
  652. raise ParseCancellationException(e)
  653. # Make sure we don't attempt to recover inline; if the parser
  654. # successfully recovers, it won't throw an exception.
  655. #
  656. def recoverInline(self, recognizer:Parser):
  657. self.recover(recognizer, InputMismatchException(recognizer))
  658. # Make sure we don't attempt to recover from problems in subrules.#
  659. def sync(self, recognizer:Parser):
  660. pass
  661. del Parser