| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- #!/usr/bin/env python
- # Copyright (c) 2018 Rocky Bernstein
- """
- SPARK example to parse simple arithmetic expressions
- """
- import sys
- from spark_parser import GenericParser, GenericASTTraversal
- from spark_parser import AST
- from spark_parser.scanner import GenericScanner, GenericToken
- class ExprScanner(GenericScanner):
- """A simple integer expression Parser.
- Note: function parse() comes from GenericASTBuilder
- """
- def __init__(self):
- GenericScanner.__init__(self)
- def tokenize(self, input):
- self.rv = []
- GenericScanner.tokenize(self, input)
- return self.rv
- def add_token(self, name, s):
- t = GenericToken(kind=name, attr=s)
- self.rv.append(t)
- # The function names below begin with 't_'.
- # This indicates to GenericScanner that these routines
- # form the tokens. GenericScanner introspects on the
- # method names of this class and the docstrings to come
- # up with both the names of the tokens and the regular expressions
- # that make up those tokens
- # Recognize white space, but we don't create a token for it.
- # This has the effect of stripping white space between tokens
- def t_whitespace(self, s):
- r' \s+ '
- pass
- # Recognize binary operators.
- # The routines for '+' and '-' are separated from '*' and '/'
- # keep operator precidence separate.
- def t_add_op(self, s):
- r'[+-]'
- self.add_token('ADD_OP', s)
- def t_mult_op(self, s):
- r'[/*]'
- self.add_token('MULT_OP', s)
- # Recognize integers
- def t_integer(self, s):
- r'\d+'
- self.add_token('INTEGER', s)
- # Some kinds of parsing debugging options you might want to consider...
- #
- # The most verbose debugging::
- # DEFAULT_DEBUG = {'rules': True,
- # 'transition': True,
- # 'reduce' : True,
- # 'dups': True
- # }
- #
- # The kind of debugging I generally use:
- # DEFAULT_DEBUG = {'rules': False,
- # 'transition': False,
- # 'reduce' : True, # show grammar rule reductions
- # 'dups': True
- # }
- DEFAULT_DEBUG = {'rules': False, 'transition': False, 'reduce': False, 'dups': True}
- class ExprParser(GenericParser):
- """A simple expression parser for numbers and arithmetic operators: +, , *, and /.
- Note: methods that begin p_ have docstrings that are grammar rules interpreted
- by SPARK.
- """
- def __init__(self, start='expr', debug=DEFAULT_DEBUG):
- GenericParser.__init__(self, start, debug)
- # Below are methods for the grammar rules and the AST tree-building
- # action to take. The method name starts with p_, but note that the
- # the method name itself after the p_ isn't used; it is just
- # suggestive of what the grammar rules in docstring comments do.
- def p_expr_add_term(self, args):
- ' expr ::= expr ADD_OP term '
- op = 'add' if args[1].attr == '+' else 'subtract'
- return AST(op, [args[0], args[2]])
- def p_expr2term(self, args):
- ' expr ::= term '
- return AST('single', [args[0]])
- def p_term_mult_factor(self, args):
- ' term ::= term MULT_OP factor '
- op = 'multiply' if args[1].attr == '*' else 'divide'
- return AST(op, [args[0], args[2]])
- def p_term2single(self, args):
- ' term ::= factor '
- return AST('single', [args[0]])
- def p_factor2integer(self, args):
- ' factor ::= INTEGER '
- return AST('single', [args[0]])
- class Interpret(GenericASTTraversal):
- def __init__(self, ast):
- GenericASTTraversal.__init__(self, ast)
- self.postorder(ast)
- self.attr = int(ast.attr)
- # Rules for interpreting nodes based on their AST node type
- def n_integer(self, node):
- node.attr = int(node.attr)
- def n_single(self, node):
- node.attr = node.data[0].attr
- def n_multiply(self, node):
- node.attr = int(node[0].attr) * int(node[1].attr)
- def n_divide(self, node):
- node.attr = int(node[0].attr) / int(node[1].attr)
- def n_add(self, node):
- node.attr = int(node[0].attr) + int(node[1].attr)
- def n_subtract(self, node):
- node.attr = int(node[0].attr) - int(node[1].attr)
- def default(self, node):
- pass
- def scan_expression(data):
- """
- Tokenize *filename* into integers, numbers, and operators
- """
- scanner = ExprScanner()
- return scanner.tokenize(data)
- def parse_expression(tokens):
- parser = ExprParser()
- return parser.parse(tokens)
- if __name__ == '__main__':
- if len(sys.argv) == 1:
- filename = 'expr1.txt'
- data = open(filename).read()
- elif len(sys.argv) == 2:
- if sys.argv[1] in ['-h', '--help']:
- print(""""usage: %s [filename | expression ]""" %
- sys.argv[0])
- sys.exit(1)
- filename = sys.argv[1]
- data = open(filename).read()
- else:
- data = ' '.join(sys.argv[1:])
- print(data)
- tokens = scan_expression(data)
- print(tokens)
- tree = parse_expression(tokens)
- print(tree)
- i = Interpret(tree)
- print("Final value is: %d" % i.attr)
|