| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580 |
- #
- # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- # Use of this file is governed by the BSD 3-clause license that
- # can be found in the LICENSE.txt file in the project root.
- import sys
- if sys.version_info[1] > 5:
- from typing import TextIO
- else:
- from typing.io import TextIO
- from antlr4.BufferedTokenStream import TokenStream
- from antlr4.CommonTokenFactory import TokenFactory
- from antlr4.error.ErrorStrategy import DefaultErrorStrategy
- from antlr4.InputStream import InputStream
- from antlr4.Recognizer import Recognizer
- from antlr4.RuleContext import RuleContext
- from antlr4.ParserRuleContext import ParserRuleContext
- from antlr4.Token import Token
- from antlr4.Lexer import Lexer
- from antlr4.atn.ATNDeserializer import ATNDeserializer
- from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions
- from antlr4.error.Errors import UnsupportedOperationException, RecognitionException
- from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher
- from antlr4.tree.Tree import ParseTreeListener, TerminalNode, ErrorNode
- class TraceListener(ParseTreeListener):
- __slots__ = '_parser'
- def __init__(self, parser):
- self._parser = parser
- def enterEveryRule(self, ctx):
- print("enter " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text, file=self._parser._output)
- def visitTerminal(self, node):
- print("consume " + str(node.symbol) + " rule " + self._parser.ruleNames[self._parser._ctx.getRuleIndex()], file=self._parser._output)
- def visitErrorNode(self, node):
- pass
- def exitEveryRule(self, ctx):
- print("exit " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text, file=self._parser._output)
- # self is all the parsing support code essentially; most of it is error recovery stuff.#
- class Parser (Recognizer):
- __slots__ = (
- '_input', '_output', '_errHandler', '_precedenceStack', '_ctx',
- 'buildParseTrees', '_tracer', '_parseListeners', '_syntaxErrors'
- )
- # self field maps from the serialized ATN string to the deserialized {@link ATN} with
- # bypass alternatives.
- #
- # @see ATNDeserializationOptions#isGenerateRuleBypassTransitions()
- #
- bypassAltsAtnCache = dict()
- def __init__(self, input:TokenStream, output:TextIO = sys.stdout):
- super().__init__()
- # The input stream.
- self._input = None
- self._output = output
- # The error handling strategy for the parser. The default value is a new
- # instance of {@link DefaultErrorStrategy}.
- self._errHandler = DefaultErrorStrategy()
- self._precedenceStack = list()
- self._precedenceStack.append(0)
- # The {@link ParserRuleContext} object for the currently executing rule.
- # self is always non-null during the parsing process.
- self._ctx = None
- # Specifies whether or not the parser should construct a parse tree during
- # the parsing process. The default value is {@code true}.
- self.buildParseTrees = True
- # When {@link #setTrace}{@code (true)} is called, a reference to the
- # {@link TraceListener} is stored here so it can be easily removed in a
- # later call to {@link #setTrace}{@code (false)}. The listener itself is
- # implemented as a parser listener so self field is not directly used by
- # other parser methods.
- self._tracer = None
- # The list of {@link ParseTreeListener} listeners registered to receive
- # events during the parse.
- self._parseListeners = None
- # The number of syntax errors reported during parsing. self value is
- # incremented each time {@link #notifyErrorListeners} is called.
- self._syntaxErrors = 0
- self.setInputStream(input)
- # reset the parser's state#
- def reset(self):
- if self._input is not None:
- self._input.seek(0)
- self._errHandler.reset(self)
- self._ctx = None
- self._syntaxErrors = 0
- self.setTrace(False)
- self._precedenceStack = list()
- self._precedenceStack.append(0)
- if self._interp is not None:
- self._interp.reset()
- # Match current input symbol against {@code ttype}. If the symbol type
- # matches, {@link ANTLRErrorStrategy#reportMatch} and {@link #consume} are
- # called to complete the match process.
- #
- # <p>If the symbol type does not match,
- # {@link ANTLRErrorStrategy#recoverInline} is called on the current error
- # strategy to attempt recovery. If {@link #getBuildParseTree} is
- # {@code true} and the token index of the symbol returned by
- # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to
- # the parse tree by calling {@link ParserRuleContext#addErrorNode}.</p>
- #
- # @param ttype the token type to match
- # @return the matched symbol
- # @throws RecognitionException if the current input symbol did not match
- # {@code ttype} and the error strategy could not recover from the
- # mismatched symbol
- def match(self, ttype:int):
- t = self.getCurrentToken()
- if t.type==ttype:
- self._errHandler.reportMatch(self)
- self.consume()
- else:
- t = self._errHandler.recoverInline(self)
- if self.buildParseTrees and t.tokenIndex==-1:
- # we must have conjured up a new token during single token insertion
- # if it's not the current symbol
- self._ctx.addErrorNode(t)
- return t
- # Match current input symbol as a wildcard. If the symbol type matches
- # (i.e. has a value greater than 0), {@link ANTLRErrorStrategy#reportMatch}
- # and {@link #consume} are called to complete the match process.
- #
- # <p>If the symbol type does not match,
- # {@link ANTLRErrorStrategy#recoverInline} is called on the current error
- # strategy to attempt recovery. If {@link #getBuildParseTree} is
- # {@code true} and the token index of the symbol returned by
- # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to
- # the parse tree by calling {@link ParserRuleContext#addErrorNode}.</p>
- #
- # @return the matched symbol
- # @throws RecognitionException if the current input symbol did not match
- # a wildcard and the error strategy could not recover from the mismatched
- # symbol
- def matchWildcard(self):
- t = self.getCurrentToken()
- if t.type > 0:
- self._errHandler.reportMatch(self)
- self.consume()
- else:
- t = self._errHandler.recoverInline(self)
- if self.buildParseTrees and t.tokenIndex == -1:
- # we must have conjured up a new token during single token insertion
- # if it's not the current symbol
- self._ctx.addErrorNode(t)
- return t
- def getParseListeners(self):
- return list() if self._parseListeners is None else self._parseListeners
- # Registers {@code listener} to receive events during the parsing process.
- #
- # <p>To support output-preserving grammar transformations (including but not
- # limited to left-recursion removal, automated left-factoring, and
- # optimized code generation), calls to listener methods during the parse
- # may differ substantially from calls made by
- # {@link ParseTreeWalker#DEFAULT} used after the parse is complete. In
- # particular, rule entry and exit events may occur in a different order
- # during the parse than after the parser. In addition, calls to certain
- # rule entry methods may be omitted.</p>
- #
- # <p>With the following specific exceptions, calls to listener events are
- # <em>deterministic</em>, i.e. for identical input the calls to listener
- # methods will be the same.</p>
- #
- # <ul>
- # <li>Alterations to the grammar used to generate code may change the
- # behavior of the listener calls.</li>
- # <li>Alterations to the command line options passed to ANTLR 4 when
- # generating the parser may change the behavior of the listener calls.</li>
- # <li>Changing the version of the ANTLR Tool used to generate the parser
- # may change the behavior of the listener calls.</li>
- # </ul>
- #
- # @param listener the listener to add
- #
- # @throws NullPointerException if {@code} listener is {@code null}
- #
- def addParseListener(self, listener:ParseTreeListener):
- if listener is None:
- raise ReferenceError("listener")
- if self._parseListeners is None:
- self._parseListeners = []
- self._parseListeners.append(listener)
- #
- # Remove {@code listener} from the list of parse listeners.
- #
- # <p>If {@code listener} is {@code null} or has not been added as a parse
- # listener, self method does nothing.</p>
- # @param listener the listener to remove
- #
- def removeParseListener(self, listener:ParseTreeListener):
- if self._parseListeners is not None:
- self._parseListeners.remove(listener)
- if len(self._parseListeners)==0:
- self._parseListeners = None
- # Remove all parse listeners.
- def removeParseListeners(self):
- self._parseListeners = None
- # Notify any parse listeners of an enter rule event.
- def triggerEnterRuleEvent(self):
- if self._parseListeners is not None:
- for listener in self._parseListeners:
- listener.enterEveryRule(self._ctx)
- self._ctx.enterRule(listener)
- #
- # Notify any parse listeners of an exit rule event.
- #
- # @see #addParseListener
- #
- def triggerExitRuleEvent(self):
- if self._parseListeners is not None:
- # reverse order walk of listeners
- for listener in reversed(self._parseListeners):
- self._ctx.exitRule(listener)
- listener.exitEveryRule(self._ctx)
- # Gets the number of syntax errors reported during parsing. This value is
- # incremented each time {@link #notifyErrorListeners} is called.
- #
- # @see #notifyErrorListeners
- #
- def getNumberOfSyntaxErrors(self):
- return self._syntaxErrors
- def getTokenFactory(self):
- return self._input.tokenSource._factory
- # Tell our token source and error strategy about a new way to create tokens.#
- def setTokenFactory(self, factory:TokenFactory):
- self._input.tokenSource._factory = factory
- # The ATN with bypass alternatives is expensive to create so we create it
- # lazily.
- #
- # @throws UnsupportedOperationException if the current parser does not
- # implement the {@link #getSerializedATN()} method.
- #
- def getATNWithBypassAlts(self):
- serializedAtn = self.getSerializedATN()
- if serializedAtn is None:
- raise UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives.")
- result = self.bypassAltsAtnCache.get(serializedAtn, None)
- if result is None:
- deserializationOptions = ATNDeserializationOptions()
- deserializationOptions.generateRuleBypassTransitions = True
- result = ATNDeserializer(deserializationOptions).deserialize(serializedAtn)
- self.bypassAltsAtnCache[serializedAtn] = result
- return result
- # The preferred method of getting a tree pattern. For example, here's a
- # sample use:
- #
- # <pre>
- # ParseTree t = parser.expr();
- # ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
- # ParseTreeMatch m = p.match(t);
- # String id = m.get("ID");
- # </pre>
- #
- def compileParseTreePattern(self, pattern:str, patternRuleIndex:int, lexer:Lexer = None):
- if lexer is None:
- if self.getTokenStream() is not None:
- tokenSource = self.getTokenStream().tokenSource
- if isinstance( tokenSource, Lexer ):
- lexer = tokenSource
- if lexer is None:
- raise UnsupportedOperationException("Parser can't discover a lexer to use")
- m = ParseTreePatternMatcher(lexer, self)
- return m.compile(pattern, patternRuleIndex)
- def getInputStream(self):
- return self.getTokenStream()
- def setInputStream(self, input:InputStream):
- self.setTokenStream(input)
- def getTokenStream(self):
- return self._input
- # Set the token stream and reset the parser.#
- def setTokenStream(self, input:TokenStream):
- self._input = None
- self.reset()
- self._input = input
- # Match needs to return the current input symbol, which gets put
- # into the label for the associated token ref; e.g., x=ID.
- #
- def getCurrentToken(self):
- return self._input.LT(1)
- def notifyErrorListeners(self, msg:str, offendingToken:Token = None, e:RecognitionException = None):
- if offendingToken is None:
- offendingToken = self.getCurrentToken()
- self._syntaxErrors += 1
- line = offendingToken.line
- column = offendingToken.column
- listener = self.getErrorListenerDispatch()
- listener.syntaxError(self, offendingToken, line, column, msg, e)
- #
- # Consume and return the {@linkplain #getCurrentToken current symbol}.
- #
- # <p>E.g., given the following input with {@code A} being the current
- # lookahead symbol, self function moves the cursor to {@code B} and returns
- # {@code A}.</p>
- #
- # <pre>
- # A B
- # ^
- # </pre>
- #
- # If the parser is not in error recovery mode, the consumed symbol is added
- # to the parse tree using {@link ParserRuleContext#addChild(Token)}, and
- # {@link ParseTreeListener#visitTerminal} is called on any parse listeners.
- # If the parser <em>is</em> in error recovery mode, the consumed symbol is
- # added to the parse tree using
- # {@link ParserRuleContext#addErrorNode(Token)}, and
- # {@link ParseTreeListener#visitErrorNode} is called on any parse
- # listeners.
- #
- def consume(self):
- o = self.getCurrentToken()
- if o.type != Token.EOF:
- self.getInputStream().consume()
- hasListener = self._parseListeners is not None and len(self._parseListeners)>0
- if self.buildParseTrees or hasListener:
- if self._errHandler.inErrorRecoveryMode(self):
- node = self._ctx.addErrorNode(o)
- else:
- node = self._ctx.addTokenNode(o)
- if hasListener:
- for listener in self._parseListeners:
- if isinstance(node, ErrorNode):
- listener.visitErrorNode(node)
- elif isinstance(node, TerminalNode):
- listener.visitTerminal(node)
- return o
- def addContextToParseTree(self):
- # add current context to parent if we have a parent
- if self._ctx.parentCtx is not None:
- self._ctx.parentCtx.addChild(self._ctx)
- # Always called by generated parsers upon entry to a rule. Access field
- # {@link #_ctx} get the current context.
- #
- def enterRule(self, localctx:ParserRuleContext , state:int , ruleIndex:int):
- self.state = state
- self._ctx = localctx
- self._ctx.start = self._input.LT(1)
- if self.buildParseTrees:
- self.addContextToParseTree()
- if self._parseListeners is not None:
- self.triggerEnterRuleEvent()
- def exitRule(self):
- self._ctx.stop = self._input.LT(-1)
- # trigger event on _ctx, before it reverts to parent
- if self._parseListeners is not None:
- self.triggerExitRuleEvent()
- self.state = self._ctx.invokingState
- self._ctx = self._ctx.parentCtx
- def enterOuterAlt(self, localctx:ParserRuleContext, altNum:int):
- localctx.setAltNumber(altNum)
- # if we have new localctx, make sure we replace existing ctx
- # that is previous child of parse tree
- if self.buildParseTrees and self._ctx != localctx:
- if self._ctx.parentCtx is not None:
- self._ctx.parentCtx.removeLastChild()
- self._ctx.parentCtx.addChild(localctx)
- self._ctx = localctx
- # Get the precedence level for the top-most precedence rule.
- #
- # @return The precedence level for the top-most precedence rule, or -1 if
- # the parser context is not nested within a precedence rule.
- #
- def getPrecedence(self):
- if len(self._precedenceStack)==0:
- return -1
- else:
- return self._precedenceStack[-1]
- def enterRecursionRule(self, localctx:ParserRuleContext, state:int, ruleIndex:int, precedence:int):
- self.state = state
- self._precedenceStack.append(precedence)
- self._ctx = localctx
- self._ctx.start = self._input.LT(1)
- if self._parseListeners is not None:
- self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
- #
- # Like {@link #enterRule} but for recursive rules.
- #
- def pushNewRecursionContext(self, localctx:ParserRuleContext, state:int, ruleIndex:int):
- previous = self._ctx
- previous.parentCtx = localctx
- previous.invokingState = state
- previous.stop = self._input.LT(-1)
- self._ctx = localctx
- self._ctx.start = previous.start
- if self.buildParseTrees:
- self._ctx.addChild(previous)
- if self._parseListeners is not None:
- self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
- def unrollRecursionContexts(self, parentCtx:ParserRuleContext):
- self._precedenceStack.pop()
- self._ctx.stop = self._input.LT(-1)
- retCtx = self._ctx # save current ctx (return value)
- # unroll so _ctx is as it was before call to recursive method
- if self._parseListeners is not None:
- while self._ctx is not parentCtx:
- self.triggerExitRuleEvent()
- self._ctx = self._ctx.parentCtx
- else:
- self._ctx = parentCtx
- # hook into tree
- retCtx.parentCtx = parentCtx
- if self.buildParseTrees and parentCtx is not None:
- # add return ctx into invoking rule's tree
- parentCtx.addChild(retCtx)
- def getInvokingContext(self, ruleIndex:int):
- ctx = self._ctx
- while ctx is not None:
- if ctx.getRuleIndex() == ruleIndex:
- return ctx
- ctx = ctx.parentCtx
- return None
- def precpred(self, localctx:RuleContext , precedence:int):
- return precedence >= self._precedenceStack[-1]
- def inContext(self, context:str):
- # TODO: useful in parser?
- return False
- #
- # Checks whether or not {@code symbol} can follow the current state in the
- # ATN. The behavior of self method is equivalent to the following, but is
- # implemented such that the complete context-sensitive follow set does not
- # need to be explicitly constructed.
- #
- # <pre>
- # return getExpectedTokens().contains(symbol);
- # </pre>
- #
- # @param symbol the symbol type to check
- # @return {@code true} if {@code symbol} can follow the current state in
- # the ATN, otherwise {@code false}.
- #
- def isExpectedToken(self, symbol:int):
- atn = self._interp.atn
- ctx = self._ctx
- s = atn.states[self.state]
- following = atn.nextTokens(s)
- if symbol in following:
- return True
- if not Token.EPSILON in following:
- return False
- while ctx is not None and ctx.invokingState>=0 and Token.EPSILON in following:
- invokingState = atn.states[ctx.invokingState]
- rt = invokingState.transitions[0]
- following = atn.nextTokens(rt.followState)
- if symbol in following:
- return True
- ctx = ctx.parentCtx
- if Token.EPSILON in following and symbol == Token.EOF:
- return True
- else:
- return False
- # Computes the set of input symbols which could follow the current parser
- # state and context, as given by {@link #getState} and {@link #getContext},
- # respectively.
- #
- # @see ATN#getExpectedTokens(int, RuleContext)
- #
- def getExpectedTokens(self):
- return self._interp.atn.getExpectedTokens(self.state, self._ctx)
- def getExpectedTokensWithinCurrentRule(self):
- atn = self._interp.atn
- s = atn.states[self.state]
- return atn.nextTokens(s)
- # Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.#
- def getRuleIndex(self, ruleName:str):
- ruleIndex = self.getRuleIndexMap().get(ruleName, None)
- if ruleIndex is not None:
- return ruleIndex
- else:
- return -1
- # Return List<String> of the rule names in your parser instance
- # leading up to a call to the current rule. You could override if
- # you want more details such as the file/line info of where
- # in the ATN a rule is invoked.
- #
- # this is very useful for error messages.
- #
- def getRuleInvocationStack(self, p:RuleContext=None):
- if p is None:
- p = self._ctx
- stack = list()
- while p is not None:
- # compute what follows who invoked us
- ruleIndex = p.getRuleIndex()
- if ruleIndex<0:
- stack.append("n/a")
- else:
- stack.append(self.ruleNames[ruleIndex])
- p = p.parentCtx
- return stack
- # For debugging and other purposes.#
- def getDFAStrings(self):
- return [ str(dfa) for dfa in self._interp.decisionToDFA]
- # For debugging and other purposes.#
- def dumpDFA(self):
- seenOne = False
- for i in range(0, len(self._interp.decisionToDFA)):
- dfa = self._interp.decisionToDFA[i]
- if len(dfa.states)>0:
- if seenOne:
- print(file=self._output)
- print("Decision " + str(dfa.decision) + ":", file=self._output)
- print(dfa.toString(self.literalNames, self.symbolicNames), end='', file=self._output)
- seenOne = True
- def getSourceName(self):
- return self._input.sourceName
- # During a parse is sometimes useful to listen in on the rule entry and exit
- # events as well as token matches. self is for quick and dirty debugging.
- #
- def setTrace(self, trace:bool):
- if not trace:
- self.removeParseListener(self._tracer)
- self._tracer = None
- else:
- if self._tracer is not None:
- self.removeParseListener(self._tracer)
- self._tracer = TraceListener(self)
- self.addParseListener(self._tracer)
|