scanner.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. """
  2. Scanning and Token classes that might be useful
  3. in creating specific scanners.
  4. """
  5. import re
  6. def _namelist(instance):
  7. namelist, namedict, classlist = [], {}, [instance.__class__]
  8. for c in classlist:
  9. for b in c.__bases__:
  10. classlist.append(b)
  11. for name in list(c.__dict__.keys()):
  12. if name not in namedict:
  13. namelist.append(name)
  14. namedict[name] = 1
  15. return namelist
  16. class GenericToken:
  17. """A sample Token class that can be used in scanning"""
  18. def __init__(self, kind, attr=None):
  19. self.kind = kind
  20. self.attr = attr
  21. def __eq__(self, o):
  22. """ '==', but it's okay if offsets and linestarts are different"""
  23. if isinstance(o, GenericToken):
  24. return (self.kind == o.kind) and (self.attr == o.attr)
  25. else:
  26. return self.kind == o
  27. def __str__(self):
  28. if self.attr:
  29. return 'kind: %s, value: %r' % (self.kind, self.attr)
  30. else:
  31. return "kind: %s" % self.kind
  32. def __repr__(self):
  33. return self.attr or self.kind
  34. # Used in generic table-driven semantics routines
  35. def __hash__(self):
  36. return hash(self.attr)
  37. # Used in generic table-driven semantics routines
  38. def __getitem__(self, i):
  39. raise IndexError
  40. class GenericScanner:
  41. """A class which can be used subclass off of to make
  42. specific sets of scanners. Scanner methods that are subclassed off
  43. of this that begin with t_ will be introspected in their
  44. documentation string and uses as a regular expression in a token pattern.
  45. For example:
  46. def t_add_op(self, s):
  47. r'[+-]'
  48. t = GenericToken(kind='ADD_OP', attr=s)
  49. self.rv.append(t)
  50. """
  51. def __init__(self):
  52. pattern = self.reflect()
  53. self.pos = 0
  54. self.re = re.compile(pattern, re.VERBOSE)
  55. self.index2func = {}
  56. for name, number in self.re.groupindex.items():
  57. self.index2func[number-1] = getattr(self, 't_' + name)
  58. def makeRE(self, name):
  59. doc = getattr(self, name).__doc__
  60. rv = '(?P<%s>%s)' % (name[2:], doc)
  61. return rv
  62. def reflect(self):
  63. rv = []
  64. for name in list(_namelist(self)):
  65. if name[:2] == 't_' and name != 't_default':
  66. rv.append(self.makeRE(name))
  67. rv.append(self.makeRE('t_default'))
  68. return '|'.join(rv)
  69. def error(self, s):
  70. """Simple-minded error handler. see py2_scan for another
  71. possibility.'
  72. """
  73. print("Lexical error in %s at position %s" % (s, self.pos))
  74. raise SystemExit
  75. def tokenize(self, s):
  76. self.pos = 0
  77. n = len(s)
  78. while self.pos < n:
  79. m = self.re.match(s, self.pos)
  80. if m is None:
  81. self.error(s)
  82. groups = m.groups()
  83. for i in range(len(groups)):
  84. if groups[i] and i in self.index2func:
  85. self.index2func[i](groups[i])
  86. self.pos = m.end()
  87. def t_default(self, s):
  88. r'( \n )+'
  89. pass