lalr_parser.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. """This module implements a LALR(1) Parser
  2. """
  3. # Author: Erez Shinan (2017)
  4. # Email : erezshin@gmail.com
  5. from typing import Dict, Any, Optional
  6. from ..lexer import Token, LexerThread
  7. from ..utils import Serialize
  8. from ..common import ParserConf, ParserCallbacks
  9. from .lalr_analysis import LALR_Analyzer, IntParseTable, ParseTableBase
  10. from .lalr_interactive_parser import InteractiveParser
  11. from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
  12. from .lalr_parser_state import ParserState, ParseConf
  13. ###{standalone
  14. class LALR_Parser(Serialize):
  15. def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
  16. analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
  17. analysis.compute_lalr()
  18. callbacks = parser_conf.callbacks
  19. self._parse_table = analysis.parse_table
  20. self.parser_conf = parser_conf
  21. self.parser = _Parser(analysis.parse_table, callbacks, debug)
  22. @classmethod
  23. def deserialize(cls, data, memo, callbacks, debug=False):
  24. inst = cls.__new__(cls)
  25. inst._parse_table = IntParseTable.deserialize(data, memo)
  26. inst.parser = _Parser(inst._parse_table, callbacks, debug)
  27. return inst
  28. def serialize(self, memo: Any = None) -> Dict[str, Any]:
  29. return self._parse_table.serialize(memo)
  30. def parse_interactive(self, lexer: LexerThread, start: str):
  31. return self.parser.parse(lexer, start, start_interactive=True)
  32. def parse(self, lexer, start, on_error=None):
  33. try:
  34. return self.parser.parse(lexer, start)
  35. except UnexpectedInput as e:
  36. if on_error is None:
  37. raise
  38. while True:
  39. if isinstance(e, UnexpectedCharacters):
  40. s = e.interactive_parser.lexer_thread.state
  41. p = s.line_ctr.char_pos
  42. if not on_error(e):
  43. raise e
  44. if isinstance(e, UnexpectedCharacters):
  45. # If user didn't change the character position, then we should
  46. if p == s.line_ctr.char_pos:
  47. s.line_ctr.feed(s.text.text[p:p+1])
  48. try:
  49. return e.interactive_parser.resume_parse()
  50. except UnexpectedToken as e2:
  51. if (isinstance(e, UnexpectedToken)
  52. and e.token.type == e2.token.type == '$END'
  53. and e.interactive_parser == e2.interactive_parser):
  54. # Prevent infinite loop
  55. raise e2
  56. e = e2
  57. except UnexpectedCharacters as e2:
  58. e = e2
  59. class _Parser:
  60. parse_table: ParseTableBase
  61. callbacks: ParserCallbacks
  62. debug: bool
  63. def __init__(self, parse_table: ParseTableBase, callbacks: ParserCallbacks, debug: bool=False):
  64. self.parse_table = parse_table
  65. self.callbacks = callbacks
  66. self.debug = debug
  67. def parse(self, lexer: LexerThread, start: str, value_stack=None, state_stack=None, start_interactive=False):
  68. parse_conf = ParseConf(self.parse_table, self.callbacks, start)
  69. parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
  70. if start_interactive:
  71. return InteractiveParser(self, parser_state, parser_state.lexer)
  72. return self.parse_from_state(parser_state)
  73. def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None):
  74. """Run the main LALR parser loop
  75. Parameters:
  76. state - the initial state. Changed in-place.
  77. last_token - Used only for line information in case of an empty lexer.
  78. """
  79. try:
  80. token = last_token
  81. for token in state.lexer.lex(state):
  82. assert token is not None
  83. state.feed_token(token)
  84. end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
  85. return state.feed_token(end_token, True)
  86. except UnexpectedInput as e:
  87. try:
  88. e.interactive_parser = InteractiveParser(self, state, state.lexer)
  89. except NameError:
  90. pass
  91. raise e
  92. except Exception as e:
  93. if self.debug:
  94. print("")
  95. print("STATE STACK DUMP")
  96. print("----------------")
  97. for i, s in enumerate(state.state_stack):
  98. print('%d)' % i , s)
  99. print("")
  100. raise
  101. ###}