expr.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. #!/usr/bin/env python
  2. # Copyright (c) 2018 Rocky Bernstein
  3. """
  4. SPARK example to parse simple arithmetic expressions
  5. """
  6. import sys
  7. from spark_parser import GenericParser, GenericASTTraversal
  8. from spark_parser import AST
  9. from spark_parser.scanner import GenericScanner, GenericToken
  10. class ExprScanner(GenericScanner):
  11. """A simple integer expression Parser.
  12. Note: function parse() comes from GenericASTBuilder
  13. """
  14. def __init__(self):
  15. GenericScanner.__init__(self)
  16. def tokenize(self, input):
  17. self.rv = []
  18. GenericScanner.tokenize(self, input)
  19. return self.rv
  20. def add_token(self, name, s):
  21. t = GenericToken(kind=name, attr=s)
  22. self.rv.append(t)
  23. # The function names below begin with 't_'.
  24. # This indicates to GenericScanner that these routines
  25. # form the tokens. GenericScanner introspects on the
  26. # method names of this class and the docstrings to come
  27. # up with both the names of the tokens and the regular expressions
  28. # that make up those tokens
  29. # Recognize white space, but we don't create a token for it.
  30. # This has the effect of stripping white space between tokens
  31. def t_whitespace(self, s):
  32. r' \s+ '
  33. pass
  34. # Recognize binary operators.
  35. # The routines for '+' and '-' are separated from '*' and '/'
  36. # keep operator precidence separate.
  37. def t_add_op(self, s):
  38. r'[+-]'
  39. self.add_token('ADD_OP', s)
  40. def t_mult_op(self, s):
  41. r'[/*]'
  42. self.add_token('MULT_OP', s)
  43. # Recognize integers
  44. def t_integer(self, s):
  45. r'\d+'
  46. self.add_token('INTEGER', s)
  47. # Some kinds of parsing debugging options you might want to consider...
  48. #
  49. # The most verbose debugging::
  50. # DEFAULT_DEBUG = {'rules': True,
  51. # 'transition': True,
  52. # 'reduce' : True,
  53. # 'dups': True
  54. # }
  55. #
  56. # The kind of debugging I generally use:
  57. # DEFAULT_DEBUG = {'rules': False,
  58. # 'transition': False,
  59. # 'reduce' : True, # show grammar rule reductions
  60. # 'dups': True
  61. # }
  62. DEFAULT_DEBUG = {'rules': False, 'transition': False, 'reduce': False, 'dups': True}
  63. class ExprParser(GenericParser):
  64. """A simple expression parser for numbers and arithmetic operators: +, , *, and /.
  65. Note: methods that begin p_ have docstrings that are grammar rules interpreted
  66. by SPARK.
  67. """
  68. def __init__(self, start='expr', debug=DEFAULT_DEBUG):
  69. GenericParser.__init__(self, start, debug)
  70. # Below are methods for the grammar rules and the AST tree-building
  71. # action to take. The method name starts with p_, but note that the
  72. # the method name itself after the p_ isn't used; it is just
  73. # suggestive of what the grammar rules in docstring comments do.
  74. def p_expr_add_term(self, args):
  75. ' expr ::= expr ADD_OP term '
  76. op = 'add' if args[1].attr == '+' else 'subtract'
  77. return AST(op, [args[0], args[2]])
  78. def p_expr2term(self, args):
  79. ' expr ::= term '
  80. return AST('single', [args[0]])
  81. def p_term_mult_factor(self, args):
  82. ' term ::= term MULT_OP factor '
  83. op = 'multiply' if args[1].attr == '*' else 'divide'
  84. return AST(op, [args[0], args[2]])
  85. def p_term2single(self, args):
  86. ' term ::= factor '
  87. return AST('single', [args[0]])
  88. def p_factor2integer(self, args):
  89. ' factor ::= INTEGER '
  90. return AST('single', [args[0]])
  91. class Interpret(GenericASTTraversal):
  92. def __init__(self, ast):
  93. GenericASTTraversal.__init__(self, ast)
  94. self.postorder(ast)
  95. self.attr = int(ast.attr)
  96. # Rules for interpreting nodes based on their AST node type
  97. def n_integer(self, node):
  98. node.attr = int(node.attr)
  99. def n_single(self, node):
  100. node.attr = node.data[0].attr
  101. def n_multiply(self, node):
  102. node.attr = int(node[0].attr) * int(node[1].attr)
  103. def n_divide(self, node):
  104. node.attr = int(node[0].attr) / int(node[1].attr)
  105. def n_add(self, node):
  106. node.attr = int(node[0].attr) + int(node[1].attr)
  107. def n_subtract(self, node):
  108. node.attr = int(node[0].attr) - int(node[1].attr)
  109. def default(self, node):
  110. pass
  111. def scan_expression(data):
  112. """
  113. Tokenize *filename* into integers, numbers, and operators
  114. """
  115. scanner = ExprScanner()
  116. return scanner.tokenize(data)
  117. def parse_expression(tokens):
  118. parser = ExprParser()
  119. return parser.parse(tokens)
  120. if __name__ == '__main__':
  121. if len(sys.argv) == 1:
  122. filename = 'expr1.txt'
  123. data = open(filename).read()
  124. elif len(sys.argv) == 2:
  125. if sys.argv[1] in ['-h', '--help']:
  126. print(""""usage: %s [filename | expression ]""" %
  127. sys.argv[0])
  128. sys.exit(1)
  129. filename = sys.argv[1]
  130. data = open(filename).read()
  131. else:
  132. data = ' '.join(sys.argv[1:])
  133. print(data)
  134. tokens = scan_expression(data)
  135. print(tokens)
  136. tree = parse_expression(tokens)
  137. print(tree)
  138. i = Interpret(tree)
  139. print("Final value is: %d" % i.attr)