Parser.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. #
  2. # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  3. # Use of this file is governed by the BSD 3-clause license that
  4. # can be found in the LICENSE.txt file in the project root.
  5. import sys
  6. if sys.version_info[1] > 5:
  7. from typing import TextIO
  8. else:
  9. from typing.io import TextIO
  10. from antlr4.BufferedTokenStream import TokenStream
  11. from antlr4.CommonTokenFactory import TokenFactory
  12. from antlr4.error.ErrorStrategy import DefaultErrorStrategy
  13. from antlr4.InputStream import InputStream
  14. from antlr4.Recognizer import Recognizer
  15. from antlr4.RuleContext import RuleContext
  16. from antlr4.ParserRuleContext import ParserRuleContext
  17. from antlr4.Token import Token
  18. from antlr4.Lexer import Lexer
  19. from antlr4.atn.ATNDeserializer import ATNDeserializer
  20. from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions
  21. from antlr4.error.Errors import UnsupportedOperationException, RecognitionException
  22. from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher
  23. from antlr4.tree.Tree import ParseTreeListener, TerminalNode, ErrorNode
  24. class TraceListener(ParseTreeListener):
  25. __slots__ = '_parser'
  26. def __init__(self, parser):
  27. self._parser = parser
  28. def enterEveryRule(self, ctx):
  29. print("enter " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text, file=self._parser._output)
  30. def visitTerminal(self, node):
  31. print("consume " + str(node.symbol) + " rule " + self._parser.ruleNames[self._parser._ctx.getRuleIndex()], file=self._parser._output)
  32. def visitErrorNode(self, node):
  33. pass
  34. def exitEveryRule(self, ctx):
  35. print("exit " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text, file=self._parser._output)
  36. # self is all the parsing support code essentially; most of it is error recovery stuff.#
  37. class Parser (Recognizer):
  38. __slots__ = (
  39. '_input', '_output', '_errHandler', '_precedenceStack', '_ctx',
  40. 'buildParseTrees', '_tracer', '_parseListeners', '_syntaxErrors'
  41. )
  42. # self field maps from the serialized ATN string to the deserialized {@link ATN} with
  43. # bypass alternatives.
  44. #
  45. # @see ATNDeserializationOptions#isGenerateRuleBypassTransitions()
  46. #
  47. bypassAltsAtnCache = dict()
  48. def __init__(self, input:TokenStream, output:TextIO = sys.stdout):
  49. super().__init__()
  50. # The input stream.
  51. self._input = None
  52. self._output = output
  53. # The error handling strategy for the parser. The default value is a new
  54. # instance of {@link DefaultErrorStrategy}.
  55. self._errHandler = DefaultErrorStrategy()
  56. self._precedenceStack = list()
  57. self._precedenceStack.append(0)
  58. # The {@link ParserRuleContext} object for the currently executing rule.
  59. # self is always non-null during the parsing process.
  60. self._ctx = None
  61. # Specifies whether or not the parser should construct a parse tree during
  62. # the parsing process. The default value is {@code true}.
  63. self.buildParseTrees = True
  64. # When {@link #setTrace}{@code (true)} is called, a reference to the
  65. # {@link TraceListener} is stored here so it can be easily removed in a
  66. # later call to {@link #setTrace}{@code (false)}. The listener itself is
  67. # implemented as a parser listener so self field is not directly used by
  68. # other parser methods.
  69. self._tracer = None
  70. # The list of {@link ParseTreeListener} listeners registered to receive
  71. # events during the parse.
  72. self._parseListeners = None
  73. # The number of syntax errors reported during parsing. self value is
  74. # incremented each time {@link #notifyErrorListeners} is called.
  75. self._syntaxErrors = 0
  76. self.setInputStream(input)
  77. # reset the parser's state#
  78. def reset(self):
  79. if self._input is not None:
  80. self._input.seek(0)
  81. self._errHandler.reset(self)
  82. self._ctx = None
  83. self._syntaxErrors = 0
  84. self.setTrace(False)
  85. self._precedenceStack = list()
  86. self._precedenceStack.append(0)
  87. if self._interp is not None:
  88. self._interp.reset()
  89. # Match current input symbol against {@code ttype}. If the symbol type
  90. # matches, {@link ANTLRErrorStrategy#reportMatch} and {@link #consume} are
  91. # called to complete the match process.
  92. #
  93. # <p>If the symbol type does not match,
  94. # {@link ANTLRErrorStrategy#recoverInline} is called on the current error
  95. # strategy to attempt recovery. If {@link #getBuildParseTree} is
  96. # {@code true} and the token index of the symbol returned by
  97. # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to
  98. # the parse tree by calling {@link ParserRuleContext#addErrorNode}.</p>
  99. #
  100. # @param ttype the token type to match
  101. # @return the matched symbol
  102. # @throws RecognitionException if the current input symbol did not match
  103. # {@code ttype} and the error strategy could not recover from the
  104. # mismatched symbol
  105. def match(self, ttype:int):
  106. t = self.getCurrentToken()
  107. if t.type==ttype:
  108. self._errHandler.reportMatch(self)
  109. self.consume()
  110. else:
  111. t = self._errHandler.recoverInline(self)
  112. if self.buildParseTrees and t.tokenIndex==-1:
  113. # we must have conjured up a new token during single token insertion
  114. # if it's not the current symbol
  115. self._ctx.addErrorNode(t)
  116. return t
  117. # Match current input symbol as a wildcard. If the symbol type matches
  118. # (i.e. has a value greater than 0), {@link ANTLRErrorStrategy#reportMatch}
  119. # and {@link #consume} are called to complete the match process.
  120. #
  121. # <p>If the symbol type does not match,
  122. # {@link ANTLRErrorStrategy#recoverInline} is called on the current error
  123. # strategy to attempt recovery. If {@link #getBuildParseTree} is
  124. # {@code true} and the token index of the symbol returned by
  125. # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to
  126. # the parse tree by calling {@link ParserRuleContext#addErrorNode}.</p>
  127. #
  128. # @return the matched symbol
  129. # @throws RecognitionException if the current input symbol did not match
  130. # a wildcard and the error strategy could not recover from the mismatched
  131. # symbol
  132. def matchWildcard(self):
  133. t = self.getCurrentToken()
  134. if t.type > 0:
  135. self._errHandler.reportMatch(self)
  136. self.consume()
  137. else:
  138. t = self._errHandler.recoverInline(self)
  139. if self.buildParseTrees and t.tokenIndex == -1:
  140. # we must have conjured up a new token during single token insertion
  141. # if it's not the current symbol
  142. self._ctx.addErrorNode(t)
  143. return t
  144. def getParseListeners(self):
  145. return list() if self._parseListeners is None else self._parseListeners
  146. # Registers {@code listener} to receive events during the parsing process.
  147. #
  148. # <p>To support output-preserving grammar transformations (including but not
  149. # limited to left-recursion removal, automated left-factoring, and
  150. # optimized code generation), calls to listener methods during the parse
  151. # may differ substantially from calls made by
  152. # {@link ParseTreeWalker#DEFAULT} used after the parse is complete. In
  153. # particular, rule entry and exit events may occur in a different order
  154. # during the parse than after the parser. In addition, calls to certain
  155. # rule entry methods may be omitted.</p>
  156. #
  157. # <p>With the following specific exceptions, calls to listener events are
  158. # <em>deterministic</em>, i.e. for identical input the calls to listener
  159. # methods will be the same.</p>
  160. #
  161. # <ul>
  162. # <li>Alterations to the grammar used to generate code may change the
  163. # behavior of the listener calls.</li>
  164. # <li>Alterations to the command line options passed to ANTLR 4 when
  165. # generating the parser may change the behavior of the listener calls.</li>
  166. # <li>Changing the version of the ANTLR Tool used to generate the parser
  167. # may change the behavior of the listener calls.</li>
  168. # </ul>
  169. #
  170. # @param listener the listener to add
  171. #
  172. # @throws NullPointerException if {@code} listener is {@code null}
  173. #
  174. def addParseListener(self, listener:ParseTreeListener):
  175. if listener is None:
  176. raise ReferenceError("listener")
  177. if self._parseListeners is None:
  178. self._parseListeners = []
  179. self._parseListeners.append(listener)
  180. #
  181. # Remove {@code listener} from the list of parse listeners.
  182. #
  183. # <p>If {@code listener} is {@code null} or has not been added as a parse
  184. # listener, self method does nothing.</p>
  185. # @param listener the listener to remove
  186. #
  187. def removeParseListener(self, listener:ParseTreeListener):
  188. if self._parseListeners is not None:
  189. self._parseListeners.remove(listener)
  190. if len(self._parseListeners)==0:
  191. self._parseListeners = None
  192. # Remove all parse listeners.
  193. def removeParseListeners(self):
  194. self._parseListeners = None
  195. # Notify any parse listeners of an enter rule event.
  196. def triggerEnterRuleEvent(self):
  197. if self._parseListeners is not None:
  198. for listener in self._parseListeners:
  199. listener.enterEveryRule(self._ctx)
  200. self._ctx.enterRule(listener)
  201. #
  202. # Notify any parse listeners of an exit rule event.
  203. #
  204. # @see #addParseListener
  205. #
  206. def triggerExitRuleEvent(self):
  207. if self._parseListeners is not None:
  208. # reverse order walk of listeners
  209. for listener in reversed(self._parseListeners):
  210. self._ctx.exitRule(listener)
  211. listener.exitEveryRule(self._ctx)
  212. # Gets the number of syntax errors reported during parsing. This value is
  213. # incremented each time {@link #notifyErrorListeners} is called.
  214. #
  215. # @see #notifyErrorListeners
  216. #
  217. def getNumberOfSyntaxErrors(self):
  218. return self._syntaxErrors
  219. def getTokenFactory(self):
  220. return self._input.tokenSource._factory
  221. # Tell our token source and error strategy about a new way to create tokens.#
  222. def setTokenFactory(self, factory:TokenFactory):
  223. self._input.tokenSource._factory = factory
  224. # The ATN with bypass alternatives is expensive to create so we create it
  225. # lazily.
  226. #
  227. # @throws UnsupportedOperationException if the current parser does not
  228. # implement the {@link #getSerializedATN()} method.
  229. #
  230. def getATNWithBypassAlts(self):
  231. serializedAtn = self.getSerializedATN()
  232. if serializedAtn is None:
  233. raise UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives.")
  234. result = self.bypassAltsAtnCache.get(serializedAtn, None)
  235. if result is None:
  236. deserializationOptions = ATNDeserializationOptions()
  237. deserializationOptions.generateRuleBypassTransitions = True
  238. result = ATNDeserializer(deserializationOptions).deserialize(serializedAtn)
  239. self.bypassAltsAtnCache[serializedAtn] = result
  240. return result
  241. # The preferred method of getting a tree pattern. For example, here's a
  242. # sample use:
  243. #
  244. # <pre>
  245. # ParseTree t = parser.expr();
  246. # ParseTreePattern p = parser.compileParseTreePattern("&lt;ID&gt;+0", MyParser.RULE_expr);
  247. # ParseTreeMatch m = p.match(t);
  248. # String id = m.get("ID");
  249. # </pre>
  250. #
  251. def compileParseTreePattern(self, pattern:str, patternRuleIndex:int, lexer:Lexer = None):
  252. if lexer is None:
  253. if self.getTokenStream() is not None:
  254. tokenSource = self.getTokenStream().tokenSource
  255. if isinstance( tokenSource, Lexer ):
  256. lexer = tokenSource
  257. if lexer is None:
  258. raise UnsupportedOperationException("Parser can't discover a lexer to use")
  259. m = ParseTreePatternMatcher(lexer, self)
  260. return m.compile(pattern, patternRuleIndex)
  261. def getInputStream(self):
  262. return self.getTokenStream()
  263. def setInputStream(self, input:InputStream):
  264. self.setTokenStream(input)
  265. def getTokenStream(self):
  266. return self._input
  267. # Set the token stream and reset the parser.#
  268. def setTokenStream(self, input:TokenStream):
  269. self._input = None
  270. self.reset()
  271. self._input = input
  272. # Match needs to return the current input symbol, which gets put
  273. # into the label for the associated token ref; e.g., x=ID.
  274. #
  275. def getCurrentToken(self):
  276. return self._input.LT(1)
  277. def notifyErrorListeners(self, msg:str, offendingToken:Token = None, e:RecognitionException = None):
  278. if offendingToken is None:
  279. offendingToken = self.getCurrentToken()
  280. self._syntaxErrors += 1
  281. line = offendingToken.line
  282. column = offendingToken.column
  283. listener = self.getErrorListenerDispatch()
  284. listener.syntaxError(self, offendingToken, line, column, msg, e)
  285. #
  286. # Consume and return the {@linkplain #getCurrentToken current symbol}.
  287. #
  288. # <p>E.g., given the following input with {@code A} being the current
  289. # lookahead symbol, self function moves the cursor to {@code B} and returns
  290. # {@code A}.</p>
  291. #
  292. # <pre>
  293. # A B
  294. # ^
  295. # </pre>
  296. #
  297. # If the parser is not in error recovery mode, the consumed symbol is added
  298. # to the parse tree using {@link ParserRuleContext#addChild(Token)}, and
  299. # {@link ParseTreeListener#visitTerminal} is called on any parse listeners.
  300. # If the parser <em>is</em> in error recovery mode, the consumed symbol is
  301. # added to the parse tree using
  302. # {@link ParserRuleContext#addErrorNode(Token)}, and
  303. # {@link ParseTreeListener#visitErrorNode} is called on any parse
  304. # listeners.
  305. #
  306. def consume(self):
  307. o = self.getCurrentToken()
  308. if o.type != Token.EOF:
  309. self.getInputStream().consume()
  310. hasListener = self._parseListeners is not None and len(self._parseListeners)>0
  311. if self.buildParseTrees or hasListener:
  312. if self._errHandler.inErrorRecoveryMode(self):
  313. node = self._ctx.addErrorNode(o)
  314. else:
  315. node = self._ctx.addTokenNode(o)
  316. if hasListener:
  317. for listener in self._parseListeners:
  318. if isinstance(node, ErrorNode):
  319. listener.visitErrorNode(node)
  320. elif isinstance(node, TerminalNode):
  321. listener.visitTerminal(node)
  322. return o
  323. def addContextToParseTree(self):
  324. # add current context to parent if we have a parent
  325. if self._ctx.parentCtx is not None:
  326. self._ctx.parentCtx.addChild(self._ctx)
  327. # Always called by generated parsers upon entry to a rule. Access field
  328. # {@link #_ctx} get the current context.
  329. #
  330. def enterRule(self, localctx:ParserRuleContext , state:int , ruleIndex:int):
  331. self.state = state
  332. self._ctx = localctx
  333. self._ctx.start = self._input.LT(1)
  334. if self.buildParseTrees:
  335. self.addContextToParseTree()
  336. if self._parseListeners is not None:
  337. self.triggerEnterRuleEvent()
  338. def exitRule(self):
  339. self._ctx.stop = self._input.LT(-1)
  340. # trigger event on _ctx, before it reverts to parent
  341. if self._parseListeners is not None:
  342. self.triggerExitRuleEvent()
  343. self.state = self._ctx.invokingState
  344. self._ctx = self._ctx.parentCtx
  345. def enterOuterAlt(self, localctx:ParserRuleContext, altNum:int):
  346. localctx.setAltNumber(altNum)
  347. # if we have new localctx, make sure we replace existing ctx
  348. # that is previous child of parse tree
  349. if self.buildParseTrees and self._ctx != localctx:
  350. if self._ctx.parentCtx is not None:
  351. self._ctx.parentCtx.removeLastChild()
  352. self._ctx.parentCtx.addChild(localctx)
  353. self._ctx = localctx
  354. # Get the precedence level for the top-most precedence rule.
  355. #
  356. # @return The precedence level for the top-most precedence rule, or -1 if
  357. # the parser context is not nested within a precedence rule.
  358. #
  359. def getPrecedence(self):
  360. if len(self._precedenceStack)==0:
  361. return -1
  362. else:
  363. return self._precedenceStack[-1]
  364. def enterRecursionRule(self, localctx:ParserRuleContext, state:int, ruleIndex:int, precedence:int):
  365. self.state = state
  366. self._precedenceStack.append(precedence)
  367. self._ctx = localctx
  368. self._ctx.start = self._input.LT(1)
  369. if self._parseListeners is not None:
  370. self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
  371. #
  372. # Like {@link #enterRule} but for recursive rules.
  373. #
  374. def pushNewRecursionContext(self, localctx:ParserRuleContext, state:int, ruleIndex:int):
  375. previous = self._ctx
  376. previous.parentCtx = localctx
  377. previous.invokingState = state
  378. previous.stop = self._input.LT(-1)
  379. self._ctx = localctx
  380. self._ctx.start = previous.start
  381. if self.buildParseTrees:
  382. self._ctx.addChild(previous)
  383. if self._parseListeners is not None:
  384. self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
  385. def unrollRecursionContexts(self, parentCtx:ParserRuleContext):
  386. self._precedenceStack.pop()
  387. self._ctx.stop = self._input.LT(-1)
  388. retCtx = self._ctx # save current ctx (return value)
  389. # unroll so _ctx is as it was before call to recursive method
  390. if self._parseListeners is not None:
  391. while self._ctx is not parentCtx:
  392. self.triggerExitRuleEvent()
  393. self._ctx = self._ctx.parentCtx
  394. else:
  395. self._ctx = parentCtx
  396. # hook into tree
  397. retCtx.parentCtx = parentCtx
  398. if self.buildParseTrees and parentCtx is not None:
  399. # add return ctx into invoking rule's tree
  400. parentCtx.addChild(retCtx)
  401. def getInvokingContext(self, ruleIndex:int):
  402. ctx = self._ctx
  403. while ctx is not None:
  404. if ctx.getRuleIndex() == ruleIndex:
  405. return ctx
  406. ctx = ctx.parentCtx
  407. return None
  408. def precpred(self, localctx:RuleContext , precedence:int):
  409. return precedence >= self._precedenceStack[-1]
  410. def inContext(self, context:str):
  411. # TODO: useful in parser?
  412. return False
  413. #
  414. # Checks whether or not {@code symbol} can follow the current state in the
  415. # ATN. The behavior of self method is equivalent to the following, but is
  416. # implemented such that the complete context-sensitive follow set does not
  417. # need to be explicitly constructed.
  418. #
  419. # <pre>
  420. # return getExpectedTokens().contains(symbol);
  421. # </pre>
  422. #
  423. # @param symbol the symbol type to check
  424. # @return {@code true} if {@code symbol} can follow the current state in
  425. # the ATN, otherwise {@code false}.
  426. #
  427. def isExpectedToken(self, symbol:int):
  428. atn = self._interp.atn
  429. ctx = self._ctx
  430. s = atn.states[self.state]
  431. following = atn.nextTokens(s)
  432. if symbol in following:
  433. return True
  434. if not Token.EPSILON in following:
  435. return False
  436. while ctx is not None and ctx.invokingState>=0 and Token.EPSILON in following:
  437. invokingState = atn.states[ctx.invokingState]
  438. rt = invokingState.transitions[0]
  439. following = atn.nextTokens(rt.followState)
  440. if symbol in following:
  441. return True
  442. ctx = ctx.parentCtx
  443. if Token.EPSILON in following and symbol == Token.EOF:
  444. return True
  445. else:
  446. return False
  447. # Computes the set of input symbols which could follow the current parser
  448. # state and context, as given by {@link #getState} and {@link #getContext},
  449. # respectively.
  450. #
  451. # @see ATN#getExpectedTokens(int, RuleContext)
  452. #
  453. def getExpectedTokens(self):
  454. return self._interp.atn.getExpectedTokens(self.state, self._ctx)
  455. def getExpectedTokensWithinCurrentRule(self):
  456. atn = self._interp.atn
  457. s = atn.states[self.state]
  458. return atn.nextTokens(s)
  459. # Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.#
  460. def getRuleIndex(self, ruleName:str):
  461. ruleIndex = self.getRuleIndexMap().get(ruleName, None)
  462. if ruleIndex is not None:
  463. return ruleIndex
  464. else:
  465. return -1
  466. # Return List&lt;String&gt; of the rule names in your parser instance
  467. # leading up to a call to the current rule. You could override if
  468. # you want more details such as the file/line info of where
  469. # in the ATN a rule is invoked.
  470. #
  471. # this is very useful for error messages.
  472. #
  473. def getRuleInvocationStack(self, p:RuleContext=None):
  474. if p is None:
  475. p = self._ctx
  476. stack = list()
  477. while p is not None:
  478. # compute what follows who invoked us
  479. ruleIndex = p.getRuleIndex()
  480. if ruleIndex<0:
  481. stack.append("n/a")
  482. else:
  483. stack.append(self.ruleNames[ruleIndex])
  484. p = p.parentCtx
  485. return stack
  486. # For debugging and other purposes.#
  487. def getDFAStrings(self):
  488. return [ str(dfa) for dfa in self._interp.decisionToDFA]
  489. # For debugging and other purposes.#
  490. def dumpDFA(self):
  491. seenOne = False
  492. for i in range(0, len(self._interp.decisionToDFA)):
  493. dfa = self._interp.decisionToDFA[i]
  494. if len(dfa.states)>0:
  495. if seenOne:
  496. print(file=self._output)
  497. print("Decision " + str(dfa.decision) + ":", file=self._output)
  498. print(dfa.toString(self.literalNames, self.symbolicNames), end='', file=self._output)
  499. seenOne = True
  500. def getSourceName(self):
  501. return self._input.sourceName
  502. # During a parse is sometimes useful to listen in on the rule entry and exit
  503. # events as well as token matches. self is for quick and dirty debugging.
  504. #
  505. def setTrace(self, trace:bool):
  506. if not trace:
  507. self.removeParseListener(self._tracer)
  508. self._tracer = None
  509. else:
  510. if self._tracer is not None:
  511. self.removeParseListener(self._tracer)
  512. self._tracer = TraceListener(self)
  513. self.addParseListener(self._tracer)