LexerActionExecutor.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #
  2. # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  3. # Use of this file is governed by the BSD 3-clause license that
  4. # can be found in the LICENSE.txt file in the project root.
  5. #/
  6. # Represents an executor for a sequence of lexer actions which traversed during
  7. # the matching operation of a lexer rule (token).
  8. #
  9. # <p>The executor tracks position information for position-dependent lexer actions
  10. # efficiently, ensuring that actions appearing only at the end of the rule do
  11. # not cause bloating of the {@link DFA} created for the lexer.</p>
  12. from antlr4.InputStream import InputStream
  13. from antlr4.atn.LexerAction import LexerAction, LexerIndexedCustomAction
  14. # need a forward declaration
  15. Lexer = None
  16. LexerActionExecutor = None
  17. class LexerActionExecutor(object):
  18. __slots__ = ('lexerActions', 'hashCode')
  19. def __init__(self, lexerActions:list=list()):
  20. self.lexerActions = lexerActions
  21. # Caches the result of {@link #hashCode} since the hash code is an element
  22. # of the performance-critical {@link LexerATNConfig#hashCode} operation.
  23. self.hashCode = hash("".join([str(la) for la in lexerActions]))
  24. # Creates a {@link LexerActionExecutor} which executes the actions for
  25. # the input {@code lexerActionExecutor} followed by a specified
  26. # {@code lexerAction}.
  27. #
  28. # @param lexerActionExecutor The executor for actions already traversed by
  29. # the lexer while matching a token within a particular
  30. # {@link LexerATNConfig}. If this is {@code null}, the method behaves as
  31. # though it were an empty executor.
  32. # @param lexerAction The lexer action to execute after the actions
  33. # specified in {@code lexerActionExecutor}.
  34. #
  35. # @return A {@link LexerActionExecutor} for executing the combine actions
  36. # of {@code lexerActionExecutor} and {@code lexerAction}.
  37. @staticmethod
  38. def append(lexerActionExecutor:LexerActionExecutor , lexerAction:LexerAction ):
  39. if lexerActionExecutor is None:
  40. return LexerActionExecutor([ lexerAction ])
  41. lexerActions = lexerActionExecutor.lexerActions + [ lexerAction ]
  42. return LexerActionExecutor(lexerActions)
  43. # Creates a {@link LexerActionExecutor} which encodes the current offset
  44. # for position-dependent lexer actions.
  45. #
  46. # <p>Normally, when the executor encounters lexer actions where
  47. # {@link LexerAction#isPositionDependent} returns {@code true}, it calls
  48. # {@link IntStream#seek} on the input {@link CharStream} to set the input
  49. # position to the <em>end</em> of the current token. This behavior provides
  50. # for efficient DFA representation of lexer actions which appear at the end
  51. # of a lexer rule, even when the lexer rule matches a variable number of
  52. # characters.</p>
  53. #
  54. # <p>Prior to traversing a match transition in the ATN, the current offset
  55. # from the token start index is assigned to all position-dependent lexer
  56. # actions which have not already been assigned a fixed offset. By storing
  57. # the offsets relative to the token start index, the DFA representation of
  58. # lexer actions which appear in the middle of tokens remains efficient due
  59. # to sharing among tokens of the same length, regardless of their absolute
  60. # position in the input stream.</p>
  61. #
  62. # <p>If the current executor already has offsets assigned to all
  63. # position-dependent lexer actions, the method returns {@code this}.</p>
  64. #
  65. # @param offset The current offset to assign to all position-dependent
  66. # lexer actions which do not already have offsets assigned.
  67. #
  68. # @return A {@link LexerActionExecutor} which stores input stream offsets
  69. # for all position-dependent lexer actions.
  70. #/
  71. def fixOffsetBeforeMatch(self, offset:int):
  72. updatedLexerActions = None
  73. for i in range(0, len(self.lexerActions)):
  74. if self.lexerActions[i].isPositionDependent and not isinstance(self.lexerActions[i], LexerIndexedCustomAction):
  75. if updatedLexerActions is None:
  76. updatedLexerActions = [ la for la in self.lexerActions ]
  77. updatedLexerActions[i] = LexerIndexedCustomAction(offset, self.lexerActions[i])
  78. if updatedLexerActions is None:
  79. return self
  80. else:
  81. return LexerActionExecutor(updatedLexerActions)
  82. # Execute the actions encapsulated by this executor within the context of a
  83. # particular {@link Lexer}.
  84. #
  85. # <p>This method calls {@link IntStream#seek} to set the position of the
  86. # {@code input} {@link CharStream} prior to calling
  87. # {@link LexerAction#execute} on a position-dependent action. Before the
  88. # method returns, the input position will be restored to the same position
  89. # it was in when the method was invoked.</p>
  90. #
  91. # @param lexer The lexer instance.
  92. # @param input The input stream which is the source for the current token.
  93. # When this method is called, the current {@link IntStream#index} for
  94. # {@code input} should be the start of the following token, i.e. 1
  95. # character past the end of the current token.
  96. # @param startIndex The token start index. This value may be passed to
  97. # {@link IntStream#seek} to set the {@code input} position to the beginning
  98. # of the token.
  99. #/
  100. def execute(self, lexer:Lexer, input:InputStream, startIndex:int):
  101. requiresSeek = False
  102. stopIndex = input.index
  103. try:
  104. for lexerAction in self.lexerActions:
  105. if isinstance(lexerAction, LexerIndexedCustomAction):
  106. offset = lexerAction.offset
  107. input.seek(startIndex + offset)
  108. lexerAction = lexerAction.action
  109. requiresSeek = (startIndex + offset) != stopIndex
  110. elif lexerAction.isPositionDependent:
  111. input.seek(stopIndex)
  112. requiresSeek = False
  113. lexerAction.execute(lexer)
  114. finally:
  115. if requiresSeek:
  116. input.seek(stopIndex)
  117. def __hash__(self):
  118. return self.hashCode
  119. def __eq__(self, other):
  120. if self is other:
  121. return True
  122. elif not isinstance(other, LexerActionExecutor):
  123. return False
  124. else:
  125. return self.hashCode == other.hashCode \
  126. and self.lexerActions == other.lexerActions
  127. del Lexer