BufferedTokenStream.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. #
  2. # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  3. # Use of this file is governed by the BSD 3-clause license that
  4. # can be found in the LICENSE.txt file in the project root.
  5. # This implementation of {@link TokenStream} loads tokens from a
  6. # {@link TokenSource} on-demand, and places the tokens in a buffer to provide
  7. # access to any previous token by index.
  8. #
  9. # <p>
  10. # This token stream ignores the value of {@link Token#getChannel}. If your
  11. # parser requires the token stream filter tokens to only those on a particular
  12. # channel, such as {@link Token#DEFAULT_CHANNEL} or
  13. # {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
  14. # {@link CommonTokenStream}.</p>
  15. from io import StringIO
  16. from antlr4.Token import Token
  17. from antlr4.error.Errors import IllegalStateException
  18. # need forward declaration
  19. Lexer = None
  20. # this is just to keep meaningful parameter types to Parser
  21. class TokenStream(object):
  22. pass
  23. class BufferedTokenStream(TokenStream):
  24. __slots__ = ('tokenSource', 'tokens', 'index', 'fetchedEOF')
  25. def __init__(self, tokenSource:Lexer):
  26. # The {@link TokenSource} from which tokens for this stream are fetched.
  27. self.tokenSource = tokenSource
  28. # A collection of all tokens fetched from the token source. The list is
  29. # considered a complete view of the input once {@link #fetchedEOF} is set
  30. # to {@code true}.
  31. self.tokens = []
  32. # The index into {@link #tokens} of the current token (next token to
  33. # {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
  34. # {@link #LT LT(1)}.
  35. #
  36. # <p>This field is set to -1 when the stream is first constructed or when
  37. # {@link #setTokenSource} is called, indicating that the first token has
  38. # not yet been fetched from the token source. For additional information,
  39. # see the documentation of {@link IntStream} for a description of
  40. # Initializing Methods.</p>
  41. self.index = -1
  42. # Indicates whether the {@link Token#EOF} token has been fetched from
  43. # {@link #tokenSource} and added to {@link #tokens}. This field improves
  44. # performance for the following cases:
  45. #
  46. # <ul>
  47. # <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
  48. # consuming the EOF symbol is optimized by checking the values of
  49. # {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
  50. # <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
  51. # {@link #tokens} is trivial with this field.</li>
  52. # <ul>
  53. self.fetchedEOF = False
  54. def mark(self):
  55. return 0
  56. def release(self, marker:int):
  57. # no resources to release
  58. pass
  59. def reset(self):
  60. self.seek(0)
  61. def seek(self, index:int):
  62. self.lazyInit()
  63. self.index = self.adjustSeekIndex(index)
  64. def get(self, index:int):
  65. self.lazyInit()
  66. return self.tokens[index]
  67. def consume(self):
  68. skipEofCheck = False
  69. if self.index >= 0:
  70. if self.fetchedEOF:
  71. # the last token in tokens is EOF. skip check if p indexes any
  72. # fetched token except the last.
  73. skipEofCheck = self.index < len(self.tokens) - 1
  74. else:
  75. # no EOF token in tokens. skip check if p indexes a fetched token.
  76. skipEofCheck = self.index < len(self.tokens)
  77. else:
  78. # not yet initialized
  79. skipEofCheck = False
  80. if not skipEofCheck and self.LA(1) == Token.EOF:
  81. raise IllegalStateException("cannot consume EOF")
  82. if self.sync(self.index + 1):
  83. self.index = self.adjustSeekIndex(self.index + 1)
  84. # Make sure index {@code i} in tokens has a token.
  85. #
  86. # @return {@code true} if a token is located at index {@code i}, otherwise
  87. # {@code false}.
  88. # @see #get(int i)
  89. #/
  90. def sync(self, i:int):
  91. n = i - len(self.tokens) + 1 # how many more elements we need?
  92. if n > 0 :
  93. fetched = self.fetch(n)
  94. return fetched >= n
  95. return True
  96. # Add {@code n} elements to buffer.
  97. #
  98. # @return The actual number of elements added to the buffer.
  99. #/
  100. def fetch(self, n:int):
  101. if self.fetchedEOF:
  102. return 0
  103. for i in range(0, n):
  104. t = self.tokenSource.nextToken()
  105. t.tokenIndex = len(self.tokens)
  106. self.tokens.append(t)
  107. if t.type==Token.EOF:
  108. self.fetchedEOF = True
  109. return i + 1
  110. return n
  111. # Get all tokens from start..stop inclusively#/
  112. def getTokens(self, start:int, stop:int, types:set=None):
  113. if start<0 or stop<0:
  114. return None
  115. self.lazyInit()
  116. subset = []
  117. if stop >= len(self.tokens):
  118. stop = len(self.tokens)-1
  119. for i in range(start, stop):
  120. t = self.tokens[i]
  121. if t.type==Token.EOF:
  122. break
  123. if types is None or t.type in types:
  124. subset.append(t)
  125. return subset
  126. def LA(self, i:int):
  127. return self.LT(i).type
  128. def LB(self, k:int):
  129. if (self.index-k) < 0:
  130. return None
  131. return self.tokens[self.index-k]
  132. def LT(self, k:int):
  133. self.lazyInit()
  134. if k==0:
  135. return None
  136. if k < 0:
  137. return self.LB(-k)
  138. i = self.index + k - 1
  139. self.sync(i)
  140. if i >= len(self.tokens): # return EOF token
  141. # EOF must be last token
  142. return self.tokens[len(self.tokens)-1]
  143. return self.tokens[i]
  144. # Allowed derived classes to modify the behavior of operations which change
  145. # the current stream position by adjusting the target token index of a seek
  146. # operation. The default implementation simply returns {@code i}. If an
  147. # exception is thrown in this method, the current stream index should not be
  148. # changed.
  149. #
  150. # <p>For example, {@link CommonTokenStream} overrides this method to ensure that
  151. # the seek target is always an on-channel token.</p>
  152. #
  153. # @param i The target token index.
  154. # @return The adjusted target token index.
  155. def adjustSeekIndex(self, i:int):
  156. return i
  157. def lazyInit(self):
  158. if self.index == -1:
  159. self.setup()
  160. def setup(self):
  161. self.sync(0)
  162. self.index = self.adjustSeekIndex(0)
  163. # Reset this token stream by setting its token source.#/
  164. def setTokenSource(self, tokenSource:Lexer):
  165. self.tokenSource = tokenSource
  166. self.tokens = []
  167. self.index = -1
  168. self.fetchedEOF = False
  169. # Given a starting index, return the index of the next token on channel.
  170. # Return i if tokens[i] is on channel. Return the index of the EOF token
  171. # if there are no tokens on channel between i and EOF.
  172. #/
  173. def nextTokenOnChannel(self, i:int, channel:int):
  174. self.sync(i)
  175. if i>=len(self.tokens):
  176. return len(self.tokens) - 1
  177. token = self.tokens[i]
  178. while token.channel!=channel:
  179. if token.type==Token.EOF:
  180. return i
  181. i += 1
  182. self.sync(i)
  183. token = self.tokens[i]
  184. return i
  185. # Given a starting index, return the index of the previous token on channel.
  186. # Return i if tokens[i] is on channel. Return -1 if there are no tokens
  187. # on channel between i and 0.
  188. def previousTokenOnChannel(self, i:int, channel:int):
  189. while i>=0 and self.tokens[i].channel!=channel:
  190. i -= 1
  191. return i
  192. # Collect all tokens on specified channel to the right of
  193. # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or
  194. # EOF. If channel is -1, find any non default channel token.
  195. def getHiddenTokensToRight(self, tokenIndex:int, channel:int=-1):
  196. self.lazyInit()
  197. if tokenIndex<0 or tokenIndex>=len(self.tokens):
  198. raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1))
  199. from antlr4.Lexer import Lexer
  200. nextOnChannel = self.nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL)
  201. from_ = tokenIndex+1
  202. # if none onchannel to right, nextOnChannel=-1 so set to = last token
  203. to = (len(self.tokens)-1) if nextOnChannel==-1 else nextOnChannel
  204. return self.filterForChannel(from_, to, channel)
  205. # Collect all tokens on specified channel to the left of
  206. # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
  207. # If channel is -1, find any non default channel token.
  208. def getHiddenTokensToLeft(self, tokenIndex:int, channel:int=-1):
  209. self.lazyInit()
  210. if tokenIndex<0 or tokenIndex>=len(self.tokens):
  211. raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1))
  212. from antlr4.Lexer import Lexer
  213. prevOnChannel = self.previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL)
  214. if prevOnChannel == tokenIndex - 1:
  215. return None
  216. # if none on channel to left, prevOnChannel=-1 then from=0
  217. from_ = prevOnChannel+1
  218. to = tokenIndex-1
  219. return self.filterForChannel(from_, to, channel)
  220. def filterForChannel(self, left:int, right:int, channel:int):
  221. hidden = []
  222. for i in range(left, right+1):
  223. t = self.tokens[i]
  224. if channel==-1:
  225. from antlr4.Lexer import Lexer
  226. if t.channel!= Lexer.DEFAULT_TOKEN_CHANNEL:
  227. hidden.append(t)
  228. elif t.channel==channel:
  229. hidden.append(t)
  230. if len(hidden)==0:
  231. return None
  232. return hidden
  233. def getSourceName(self):
  234. return self.tokenSource.getSourceName()
  235. # Get the text of all tokens in this buffer.#/
  236. def getText(self, start:int=None, stop:int=None):
  237. self.lazyInit()
  238. self.fill()
  239. if isinstance(start, Token):
  240. start = start.tokenIndex
  241. elif start is None:
  242. start = 0
  243. if isinstance(stop, Token):
  244. stop = stop.tokenIndex
  245. elif stop is None or stop >= len(self.tokens):
  246. stop = len(self.tokens) - 1
  247. if start < 0 or stop < 0 or stop < start:
  248. return ""
  249. with StringIO() as buf:
  250. for i in range(start, stop+1):
  251. t = self.tokens[i]
  252. if t.type==Token.EOF:
  253. break
  254. buf.write(t.text)
  255. return buf.getvalue()
  256. # Get all tokens from lexer until EOF#/
  257. def fill(self):
  258. self.lazyInit()
  259. while self.fetch(1000)==1000:
  260. pass