| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352 |
- #
- # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- # Use of this file is governed by the BSD 3-clause license that
- # can be found in the LICENSE.txt file in the project root.
- #
- #
- # Represent a subset of XPath XML path syntax for use in identifying nodes in
- # parse trees.
- #
- # <p>
- # Split path into words and separators {@code /} and {@code //} via ANTLR
- # itself then walk path elements from left to right. At each separator-word
- # pair, find set of nodes. Next stage uses those as work list.</p>
- #
- # <p>
- # The basic interface is
- # {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
- # But that is just shorthand for:</p>
- #
- # <pre>
- # {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
- # return p.{@link #evaluate evaluate}(tree);
- # </pre>
- #
- # <p>
- # See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
- # allows operators:</p>
- #
- # <dl>
- # <dt>/</dt> <dd>root</dd>
- # <dt>//</dt> <dd>anywhere</dd>
- # <dt>!</dt> <dd>invert; this must appear directly after root or anywhere
- # operator</dd>
- # </dl>
- #
- # <p>
- # and path elements:</p>
- #
- # <dl>
- # <dt>ID</dt> <dd>token name</dd>
- # <dt>'string'</dt> <dd>any string literal token from the grammar</dd>
- # <dt>expr</dt> <dd>rule name</dd>
- # <dt>*</dt> <dd>wildcard matching any node</dd>
- # </dl>
- #
- # <p>
- # Whitespace is not allowed.</p>
- #
- from antlr4 import CommonTokenStream, DFA, PredictionContextCache, Lexer, LexerATNSimulator, ParserRuleContext, TerminalNode
- from antlr4.InputStream import InputStream
- from antlr4.Parser import Parser
- from antlr4.RuleContext import RuleContext
- from antlr4.Token import Token
- from antlr4.atn.ATNDeserializer import ATNDeserializer
- from antlr4.error.ErrorListener import ErrorListener
- from antlr4.error.Errors import LexerNoViableAltException
- from antlr4.tree.Tree import ParseTree
- from antlr4.tree.Trees import Trees
- from io import StringIO
- def serializedATN():
- with StringIO() as buf:
- buf.write("\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2\n")
- buf.write("\64\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t")
- buf.write("\7\4\b\t\b\4\t\t\t\3\2\3\2\3\2\3\3\3\3\3\4\3\4\3\5\3\5")
- buf.write("\3\6\3\6\7\6\37\n\6\f\6\16\6\"\13\6\3\6\3\6\3\7\3\7\5")
- buf.write("\7(\n\7\3\b\3\b\3\t\3\t\7\t.\n\t\f\t\16\t\61\13\t\3\t")
- buf.write("\3\t\3/\2\n\3\5\5\6\7\7\t\b\13\t\r\2\17\2\21\n\3\2\4\7")
- buf.write("\2\62;aa\u00b9\u00b9\u0302\u0371\u2041\u2042\17\2C\\c")
- buf.write("|\u00c2\u00d8\u00da\u00f8\u00fa\u0301\u0372\u037f\u0381")
- buf.write("\u2001\u200e\u200f\u2072\u2191\u2c02\u2ff1\u3003\ud801")
- buf.write("\uf902\ufdd1\ufdf2\uffff\64\2\3\3\2\2\2\2\5\3\2\2\2\2")
- buf.write("\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\21\3\2\2\2\3\23")
- buf.write("\3\2\2\2\5\26\3\2\2\2\7\30\3\2\2\2\t\32\3\2\2\2\13\34")
- buf.write("\3\2\2\2\r\'\3\2\2\2\17)\3\2\2\2\21+\3\2\2\2\23\24\7\61")
- buf.write("\2\2\24\25\7\61\2\2\25\4\3\2\2\2\26\27\7\61\2\2\27\6\3")
- buf.write("\2\2\2\30\31\7,\2\2\31\b\3\2\2\2\32\33\7#\2\2\33\n\3\2")
- buf.write("\2\2\34 \5\17\b\2\35\37\5\r\7\2\36\35\3\2\2\2\37\"\3\2")
- buf.write("\2\2 \36\3\2\2\2 !\3\2\2\2!#\3\2\2\2\" \3\2\2\2#$\b\6")
- buf.write("\2\2$\f\3\2\2\2%(\5\17\b\2&(\t\2\2\2\'%\3\2\2\2\'&\3\2")
- buf.write("\2\2(\16\3\2\2\2)*\t\3\2\2*\20\3\2\2\2+/\7)\2\2,.\13\2")
- buf.write("\2\2-,\3\2\2\2.\61\3\2\2\2/\60\3\2\2\2/-\3\2\2\2\60\62")
- buf.write("\3\2\2\2\61/\3\2\2\2\62\63\7)\2\2\63\22\3\2\2\2\6\2 \'")
- buf.write("/\3\3\6\2")
- return buf.getvalue()
- class XPathLexer(Lexer):
- atn = ATNDeserializer().deserialize(serializedATN())
- decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ]
- TOKEN_REF = 1
- RULE_REF = 2
- ANYWHERE = 3
- ROOT = 4
- WILDCARD = 5
- BANG = 6
- ID = 7
- STRING = 8
- modeNames = [ "DEFAULT_MODE" ]
- literalNames = [ "<INVALID>",
- "'//'", "'/'", "'*'", "'!'" ]
- symbolicNames = [ "<INVALID>",
- "TOKEN_REF", "RULE_REF", "ANYWHERE", "ROOT", "WILDCARD", "BANG",
- "ID", "STRING" ]
- ruleNames = [ "ANYWHERE", "ROOT", "WILDCARD", "BANG", "ID", "NameChar",
- "NameStartChar", "STRING" ]
- grammarFileName = "XPathLexer.g4"
- def __init__(self, input=None):
- super().__init__(input)
- self.checkVersion("4.9.1")
- self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache())
- self._actions = None
- self._predicates = None
- def action(self, localctx:RuleContext, ruleIndex:int, actionIndex:int):
- if self._actions is None:
- actions = dict()
- actions[4] = self.ID_action
- self._actions = actions
- _action = self._actions.get(ruleIndex, None)
- if _action is not None:
- _action(localctx, actionIndex)
- else:
- raise Exception("No registered action for: %d" % ruleIndex)
- def ID_action(self, localctx:RuleContext , actionIndex:int):
- if actionIndex == 0:
- char = self.text[0]
- if char.isupper():
- self.type = XPathLexer.TOKEN_REF
- else:
- self.type = XPathLexer.RULE_REF
- class XPath(object):
- WILDCARD = "*" # word not operator/separator
- NOT = "!" # word for invert operator
- def __init__(self, parser:Parser, path:str):
- self.parser = parser
- self.path = path
- self.elements = self.split(path)
- def split(self, path:str):
- input = InputStream(path)
- lexer = XPathLexer(input)
- def recover(self, e):
- raise e
- lexer.recover = recover
- lexer.removeErrorListeners()
- lexer.addErrorListener(ErrorListener()) # XPathErrorListener does no more
- tokenStream = CommonTokenStream(lexer)
- try:
- tokenStream.fill()
- except LexerNoViableAltException as e:
- pos = lexer.column
- msg = "Invalid tokens or characters at index %d in path '%s'" % (pos, path)
- raise Exception(msg, e)
- tokens = iter(tokenStream.tokens)
- elements = list()
- for el in tokens:
- invert = False
- anywhere = False
- # Check for path separators, if none assume root
- if el.type in [XPathLexer.ROOT, XPathLexer.ANYWHERE]:
- anywhere = el.type == XPathLexer.ANYWHERE
- next_el = next(tokens, None)
- if not next_el:
- raise Exception('Missing element after %s' % el.getText())
- else:
- el = next_el
- # Check for bangs
- if el.type == XPathLexer.BANG:
- invert = True
- next_el = next(tokens, None)
- if not next_el:
- raise Exception('Missing element after %s' % el.getText())
- else:
- el = next_el
- # Add searched element
- if el.type in [XPathLexer.TOKEN_REF, XPathLexer.RULE_REF, XPathLexer.WILDCARD, XPathLexer.STRING]:
- element = self.getXPathElement(el, anywhere)
- element.invert = invert
- elements.append(element)
- elif el.type==Token.EOF:
- break
- else:
- raise Exception("Unknown path element %s" % lexer.symbolicNames[el.type])
- return elements
- #
- # Convert word like {@code#} or {@code ID} or {@code expr} to a path
- # element. {@code anywhere} is {@code true} if {@code //} precedes the
- # word.
- #
- def getXPathElement(self, wordToken:Token, anywhere:bool):
- if wordToken.type==Token.EOF:
- raise Exception("Missing path element at end of path")
- word = wordToken.text
- if wordToken.type==XPathLexer.WILDCARD :
- return XPathWildcardAnywhereElement() if anywhere else XPathWildcardElement()
- elif wordToken.type in [XPathLexer.TOKEN_REF, XPathLexer.STRING]:
- tsource = self.parser.getTokenStream().tokenSource
- ttype = Token.INVALID_TYPE
- if wordToken.type == XPathLexer.TOKEN_REF:
- if word in tsource.ruleNames:
- ttype = tsource.ruleNames.index(word) + 1
- else:
- if word in tsource.literalNames:
- ttype = tsource.literalNames.index(word)
- if ttype == Token.INVALID_TYPE:
- raise Exception("%s at index %d isn't a valid token name" % (word, wordToken.tokenIndex))
- return XPathTokenAnywhereElement(word, ttype) if anywhere else XPathTokenElement(word, ttype)
- else:
- ruleIndex = self.parser.ruleNames.index(word) if word in self.parser.ruleNames else -1
- if ruleIndex == -1:
- raise Exception("%s at index %d isn't a valid rule name" % (word, wordToken.tokenIndex))
- return XPathRuleAnywhereElement(word, ruleIndex) if anywhere else XPathRuleElement(word, ruleIndex)
- @staticmethod
- def findAll(tree:ParseTree, xpath:str, parser:Parser):
- p = XPath(parser, xpath)
- return p.evaluate(tree)
- #
- # Return a list of all nodes starting at {@code t} as root that satisfy the
- # path. The root {@code /} is relative to the node passed to
- # {@link #evaluate}.
- #
- def evaluate(self, t:ParseTree):
- dummyRoot = ParserRuleContext()
- dummyRoot.children = [t] # don't set t's parent.
- work = [dummyRoot]
- for element in self.elements:
- work_next = list()
- for node in work:
- if not isinstance(node, TerminalNode) and node.children:
- # only try to match next element if it has children
- # e.g., //func/*/stat might have a token node for which
- # we can't go looking for stat nodes.
- matching = element.evaluate(node)
- # See issue antlr#370 - Prevents XPath from returning the
- # same node multiple times
- matching = filter(lambda m: m not in work_next, matching)
-
- work_next.extend(matching)
- work = work_next
- return work
- class XPathElement(object):
- def __init__(self, nodeName:str):
- self.nodeName = nodeName
- self.invert = False
- def __str__(self):
- return type(self).__name__ + "[" + ("!" if self.invert else "") + self.nodeName + "]"
- #
- # Either {@code ID} at start of path or {@code ...//ID} in middle of path.
- #
- class XPathRuleAnywhereElement(XPathElement):
- def __init__(self, ruleName:str, ruleIndex:int):
- super().__init__(ruleName)
- self.ruleIndex = ruleIndex
- def evaluate(self, t:ParseTree):
- # return all ParserRuleContext descendants of t that match ruleIndex (or do not match if inverted)
- return filter(lambda c: isinstance(c, ParserRuleContext) and (self.invert ^ (c.getRuleIndex() == self.ruleIndex)), Trees.descendants(t))
- class XPathRuleElement(XPathElement):
- def __init__(self, ruleName:str, ruleIndex:int):
- super().__init__(ruleName)
- self.ruleIndex = ruleIndex
- def evaluate(self, t:ParseTree):
- # return all ParserRuleContext children of t that match ruleIndex (or do not match if inverted)
- return filter(lambda c: isinstance(c, ParserRuleContext) and (self.invert ^ (c.getRuleIndex() == self.ruleIndex)), Trees.getChildren(t))
- class XPathTokenAnywhereElement(XPathElement):
- def __init__(self, ruleName:str, tokenType:int):
- super().__init__(ruleName)
- self.tokenType = tokenType
- def evaluate(self, t:ParseTree):
- # return all TerminalNode descendants of t that match tokenType (or do not match if inverted)
- return filter(lambda c: isinstance(c, TerminalNode) and (self.invert ^ (c.symbol.type == self.tokenType)), Trees.descendants(t))
- class XPathTokenElement(XPathElement):
- def __init__(self, ruleName:str, tokenType:int):
- super().__init__(ruleName)
- self.tokenType = tokenType
- def evaluate(self, t:ParseTree):
- # return all TerminalNode children of t that match tokenType (or do not match if inverted)
- return filter(lambda c: isinstance(c, TerminalNode) and (self.invert ^ (c.symbol.type == self.tokenType)), Trees.getChildren(t))
- class XPathWildcardAnywhereElement(XPathElement):
- def __init__(self):
- super().__init__(XPath.WILDCARD)
- def evaluate(self, t:ParseTree):
- if self.invert:
- return list() # !* is weird but valid (empty)
- else:
- return Trees.descendants(t)
- class XPathWildcardElement(XPathElement):
- def __init__(self):
- super().__init__(XPath.WILDCARD)
- def evaluate(self, t:ParseTree):
- if self.invert:
- return list() # !* is weird but valid (empty)
- else:
- return Trees.getChildren(t)
|