scanner.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. """
  2. Simple SPARK-style scanner
  3. Copyright (c) 2016-2017 Rocky Bernstein
  4. """
  5. from __future__ import print_function
  6. from spark_parser.scanner import GenericScanner, GenericToken
  7. class ExprScanner(GenericScanner):
  8. def error(self, s, pos):
  9. """Show text and a caret under that. For example:
  10. x = 2y + z
  11. ^
  12. """
  13. print("Lexical error:")
  14. print("%s" % s[:pos+10]) # + 10 for trailing context
  15. print("%s^" % (" "*(pos-1)))
  16. for t in self.rv: print(t)
  17. raise SystemExit
  18. def __init__(self):
  19. GenericScanner.__init__(self)
  20. def tokenize(self, input):
  21. self.rv = []
  22. GenericScanner.tokenize(self, input)
  23. return self.rv
  24. def add_token(self, name, s):
  25. t = GenericToken(kind=name, attr=s)
  26. self.rv.append(t)
  27. # The function names below begin with 't_'.
  28. # This indicates to GenericScanner that these routines
  29. # form the tokens. GenericScanner introspects on the
  30. # method names of this class and the docstrings to come
  31. # up with both the names of the tokens and the regular expressions
  32. # that make up those tokens
  33. # Recognize white space, but we don't create a token for it.
  34. # This has the effect of stripping white space between tokens
  35. def t_whitespace(self, s):
  36. r'\s+'
  37. pass
  38. def t_paren(self, s):
  39. r'[()]'
  40. self.add_token('LPAREN' if s == '(' else 'RPAREN', s)
  41. def t_dot(self, s):
  42. r'\.'
  43. self.add_token('DOT', s)
  44. # Recognize binary operators.
  45. # The routines for '+' and '-' are separated from '*' and '/'
  46. # keep operator precidence separate.
  47. def t_add_op(self, s):
  48. r'[+-]'
  49. self.add_token('ADD_OP', s)
  50. def t_bit_op(self, s):
  51. r'[&|^]'
  52. self.add_token('BIT_OP', s)
  53. def t_bool_op(self, s):
  54. r'or|and'
  55. self.add_token('BOOL_OP', s)
  56. def t_shift(self, s):
  57. r'<<|>>'
  58. self.add_token('SHIFT_OP', s)
  59. def t_mult(self, s):
  60. r'[*][*]|//|[/*%]'
  61. self.add_token('MULT_OP', s)
  62. # Recognize integers
  63. def t_number(self, s):
  64. r'(0x[0-9a-f]+|0b[01]+|0o[0-7]+|\d+\.\d|\d+)j?'
  65. self.add_token('NUMBER', s)
  66. # Recognize Boolean constants
  67. def t_bool(self, s):
  68. r'True|False'
  69. self.add_token('BOOL', bool(s))
  70. if __name__ == "__main__":
  71. tokens = ExprScanner().tokenize("(10.5 + 2**1 / 30) // 3 >> 1")
  72. for t in tokens:
  73. print(t)
  74. pass