| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- """
- Simple SPARK-style scanner
- Copyright (c) 2016-2017 Rocky Bernstein
- """
- from __future__ import print_function
- from spark_parser.scanner import GenericScanner, GenericToken
- class ExprScanner(GenericScanner):
- def error(self, s, pos):
- """Show text and a caret under that. For example:
- x = 2y + z
- ^
- """
- print("Lexical error:")
- print("%s" % s[:pos+10]) # + 10 for trailing context
- print("%s^" % (" "*(pos-1)))
- for t in self.rv: print(t)
- raise SystemExit
- def __init__(self):
- GenericScanner.__init__(self)
- def tokenize(self, input):
- self.rv = []
- GenericScanner.tokenize(self, input)
- return self.rv
- def add_token(self, name, s):
- t = GenericToken(kind=name, attr=s)
- self.rv.append(t)
- # The function names below begin with 't_'.
- # This indicates to GenericScanner that these routines
- # form the tokens. GenericScanner introspects on the
- # method names of this class and the docstrings to come
- # up with both the names of the tokens and the regular expressions
- # that make up those tokens
- # Recognize white space, but we don't create a token for it.
- # This has the effect of stripping white space between tokens
- def t_whitespace(self, s):
- r'\s+'
- pass
- def t_paren(self, s):
- r'[()]'
- self.add_token('LPAREN' if s == '(' else 'RPAREN', s)
- def t_dot(self, s):
- r'\.'
- self.add_token('DOT', s)
- # Recognize binary operators.
- # The routines for '+' and '-' are separated from '*' and '/'
- # keep operator precidence separate.
- def t_add_op(self, s):
- r'[+-]'
- self.add_token('ADD_OP', s)
- def t_bit_op(self, s):
- r'[&|^]'
- self.add_token('BIT_OP', s)
- def t_bool_op(self, s):
- r'or|and'
- self.add_token('BOOL_OP', s)
- def t_shift(self, s):
- r'<<|>>'
- self.add_token('SHIFT_OP', s)
- def t_mult(self, s):
- r'[*][*]|//|[/*%]'
- self.add_token('MULT_OP', s)
- # Recognize integers
- def t_number(self, s):
- r'(0x[0-9a-f]+|0b[01]+|0o[0-7]+|\d+\.\d|\d+)j?'
- self.add_token('NUMBER', s)
- # Recognize Boolean constants
- def t_bool(self, s):
- r'True|False'
- self.add_token('BOOL', bool(s))
- if __name__ == "__main__":
- tokens = ExprScanner().tokenize("(10.5 + 2**1 / 30) // 3 >> 1")
- for t in tokens:
- print(t)
- pass
|