Token.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  2. # Use of this file is governed by the BSD 3-clause license that
  3. # can be found in the LICENSE.txt file in the project root.
  4. #
  5. # A token has properties: text, type, line, character position in the line
  6. # (so we can ignore tabs), token channel, index, and source from which
  7. # we obtained this token.
  8. from io import StringIO
  9. class Token (object):
  10. __slots__ = ('source', 'type', 'channel', 'start', 'stop', 'tokenIndex', 'line', 'column', '_text')
  11. INVALID_TYPE = 0
  12. # During lookahead operations, this "token" signifies we hit rule end ATN state
  13. # and did not follow it despite needing to.
  14. EPSILON = -2
  15. MIN_USER_TOKEN_TYPE = 1
  16. EOF = -1
  17. # All tokens go to the parser (unless skip() is called in that rule)
  18. # on a particular "channel". The parser tunes to a particular channel
  19. # so that whitespace etc... can go to the parser on a "hidden" channel.
  20. DEFAULT_CHANNEL = 0
  21. # Anything on different channel than DEFAULT_CHANNEL is not parsed
  22. # by parser.
  23. HIDDEN_CHANNEL = 1
  24. def __init__(self):
  25. self.source = None
  26. self.type = None # token type of the token
  27. self.channel = None # The parser ignores everything not on DEFAULT_CHANNEL
  28. self.start = None # optional; return -1 if not implemented.
  29. self.stop = None # optional; return -1 if not implemented.
  30. self.tokenIndex = None # from 0..n-1 of the token object in the input stream
  31. self.line = None # line=1..n of the 1st character
  32. self.column = None # beginning of the line at which it occurs, 0..n-1
  33. self._text = None # text of the token.
  34. @property
  35. def text(self):
  36. return self._text
  37. # Explicitly set the text for this token. If {code text} is not
  38. # {@code null}, then {@link #getText} will return this value rather than
  39. # extracting the text from the input.
  40. #
  41. # @param text The explicit text of the token, or {@code null} if the text
  42. # should be obtained from the input along with the start and stop indexes
  43. # of the token.
  44. @text.setter
  45. def text(self, text:str):
  46. self._text = text
  47. def getTokenSource(self):
  48. return self.source[0]
  49. def getInputStream(self):
  50. return self.source[1]
  51. class CommonToken(Token):
  52. # An empty {@link Pair} which is used as the default value of
  53. # {@link #source} for tokens that do not have a source.
  54. EMPTY_SOURCE = (None, None)
  55. def __init__(self, source:tuple = EMPTY_SOURCE, type:int = None, channel:int=Token.DEFAULT_CHANNEL, start:int=-1, stop:int=-1):
  56. super().__init__()
  57. self.source = source
  58. self.type = type
  59. self.channel = channel
  60. self.start = start
  61. self.stop = stop
  62. self.tokenIndex = -1
  63. if source[0] is not None:
  64. self.line = source[0].line
  65. self.column = source[0].column
  66. else:
  67. self.column = -1
  68. # Constructs a new {@link CommonToken} as a copy of another {@link Token}.
  69. #
  70. # <p>
  71. # If {@code oldToken} is also a {@link CommonToken} instance, the newly
  72. # constructed token will share a reference to the {@link #text} field and
  73. # the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will
  74. # be assigned the result of calling {@link #getText}, and {@link #source}
  75. # will be constructed from the result of {@link Token#getTokenSource} and
  76. # {@link Token#getInputStream}.</p>
  77. #
  78. # @param oldToken The token to copy.
  79. #
  80. def clone(self):
  81. t = CommonToken(self.source, self.type, self.channel, self.start, self.stop)
  82. t.tokenIndex = self.tokenIndex
  83. t.line = self.line
  84. t.column = self.column
  85. t.text = self.text
  86. return t
  87. @property
  88. def text(self):
  89. if self._text is not None:
  90. return self._text
  91. input = self.getInputStream()
  92. if input is None:
  93. return None
  94. n = input.size
  95. if self.start < n and self.stop < n:
  96. return input.getText(self.start, self.stop)
  97. else:
  98. return "<EOF>"
  99. @text.setter
  100. def text(self, text:str):
  101. self._text = text
  102. def __str__(self):
  103. with StringIO() as buf:
  104. buf.write("[@")
  105. buf.write(str(self.tokenIndex))
  106. buf.write(",")
  107. buf.write(str(self.start))
  108. buf.write(":")
  109. buf.write(str(self.stop))
  110. buf.write("='")
  111. txt = self.text
  112. if txt is not None:
  113. txt = txt.replace("\n","\\n")
  114. txt = txt.replace("\r","\\r")
  115. txt = txt.replace("\t","\\t")
  116. else:
  117. txt = "<no text>"
  118. buf.write(txt)
  119. buf.write("',<")
  120. buf.write(str(self.type))
  121. buf.write(">")
  122. if self.channel > 0:
  123. buf.write(",channel=")
  124. buf.write(str(self.channel))
  125. buf.write(",")
  126. buf.write(str(self.line))
  127. buf.write(":")
  128. buf.write(str(self.column))
  129. buf.write("]")
  130. return buf.getvalue()