lalr_interactive_parser.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # This module provides a LALR interactive parser, which is used for debugging and error handling
  2. from typing import Iterator, List
  3. from copy import copy
  4. import warnings
  5. from lark.exceptions import UnexpectedToken
  6. from lark.lexer import Token, LexerThread
  7. from .lalr_parser_state import ParserState
  8. ###{standalone
  9. class InteractiveParser:
  10. """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.
  11. For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
  12. """
  13. def __init__(self, parser, parser_state: ParserState, lexer_thread: LexerThread):
  14. self.parser = parser
  15. self.parser_state = parser_state
  16. self.lexer_thread = lexer_thread
  17. self.result = None
  18. @property
  19. def lexer_state(self) -> LexerThread:
  20. warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning)
  21. return self.lexer_thread
  22. def feed_token(self, token: Token):
  23. """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
  24. Note that ``token`` has to be an instance of ``Token``.
  25. """
  26. return self.parser_state.feed_token(token, token.type == '$END')
  27. def iter_parse(self) -> Iterator[Token]:
  28. """Step through the different stages of the parse, by reading tokens from the lexer
  29. and feeding them to the parser, one per iteration.
  30. Returns an iterator of the tokens it encounters.
  31. When the parse is over, the resulting tree can be found in ``InteractiveParser.result``.
  32. """
  33. for token in self.lexer_thread.lex(self.parser_state):
  34. yield token
  35. self.result = self.feed_token(token)
  36. def exhaust_lexer(self) -> List[Token]:
  37. """Try to feed the rest of the lexer state into the interactive parser.
  38. Note that this modifies the instance in place and does not feed an '$END' Token
  39. """
  40. return list(self.iter_parse())
  41. def feed_eof(self, last_token=None):
  42. """Feed a '$END' Token. Borrows from 'last_token' if given."""
  43. eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1)
  44. return self.feed_token(eof)
  45. def __copy__(self):
  46. """Create a new interactive parser with a separate state.
  47. Calls to feed_token() won't affect the old instance, and vice-versa.
  48. """
  49. return self.copy()
  50. def copy(self, deepcopy_values=True):
  51. return type(self)(
  52. self.parser,
  53. self.parser_state.copy(deepcopy_values=deepcopy_values),
  54. copy(self.lexer_thread),
  55. )
  56. def __eq__(self, other):
  57. if not isinstance(other, InteractiveParser):
  58. return False
  59. return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread
  60. def as_immutable(self):
  61. """Convert to an ``ImmutableInteractiveParser``."""
  62. p = copy(self)
  63. return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
  64. def pretty(self):
  65. """Print the output of ``choices()`` in a way that's easier to read."""
  66. out = ["Parser choices:"]
  67. for k, v in self.choices().items():
  68. out.append('\t- %s -> %r' % (k, v))
  69. out.append('stack size: %s' % len(self.parser_state.state_stack))
  70. return '\n'.join(out)
  71. def choices(self):
  72. """Returns a dictionary of token types, matched to their action in the parser.
  73. Only returns token types that are accepted by the current state.
  74. Updated by ``feed_token()``.
  75. """
  76. return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
  77. def accepts(self):
  78. """Returns the set of possible tokens that will advance the parser into a new valid state."""
  79. accepts = set()
  80. conf_no_callbacks = copy(self.parser_state.parse_conf)
  81. # We don't want to call callbacks here since those might have arbitrary side effects
  82. # and are unnecessarily slow.
  83. conf_no_callbacks.callbacks = {}
  84. for t in self.choices():
  85. if t.isupper(): # is terminal?
  86. new_cursor = self.copy(deepcopy_values=False)
  87. new_cursor.parser_state.parse_conf = conf_no_callbacks
  88. try:
  89. new_cursor.feed_token(self.lexer_thread._Token(t, ''))
  90. except UnexpectedToken:
  91. pass
  92. else:
  93. accepts.add(t)
  94. return accepts
  95. def resume_parse(self):
  96. """Resume automated parsing from the current state.
  97. """
  98. return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token)
  99. class ImmutableInteractiveParser(InteractiveParser):
  100. """Same as ``InteractiveParser``, but operations create a new instance instead
  101. of changing it in-place.
  102. """
  103. result = None
  104. def __hash__(self):
  105. return hash((self.parser_state, self.lexer_thread))
  106. def feed_token(self, token):
  107. c = copy(self)
  108. c.result = InteractiveParser.feed_token(c, token)
  109. return c
  110. def exhaust_lexer(self):
  111. """Try to feed the rest of the lexer state into the parser.
  112. Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
  113. cursor = self.as_mutable()
  114. cursor.exhaust_lexer()
  115. return cursor.as_immutable()
  116. def as_mutable(self):
  117. """Convert to an ``InteractiveParser``."""
  118. p = copy(self)
  119. return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)
  120. ###}