| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- """
- Scanning and Token classes that might be useful
- in creating specific scanners.
- """
- import re
- def _namelist(instance):
- namelist, namedict, classlist = [], {}, [instance.__class__]
- for c in classlist:
- for b in c.__bases__:
- classlist.append(b)
- for name in list(c.__dict__.keys()):
- if name not in namedict:
- namelist.append(name)
- namedict[name] = 1
- return namelist
- class GenericToken:
- """A sample Token class that can be used in scanning"""
- def __init__(self, kind, attr=None):
- self.kind = kind
- self.attr = attr
- def __eq__(self, o):
- """ '==', but it's okay if offsets and linestarts are different"""
- if isinstance(o, GenericToken):
- return (self.kind == o.kind) and (self.attr == o.attr)
- else:
- return self.kind == o
- def __str__(self):
- if self.attr:
- return 'kind: %s, value: %r' % (self.kind, self.attr)
- else:
- return "kind: %s" % self.kind
- def __repr__(self):
- return self.attr or self.kind
- # Used in generic table-driven semantics routines
- def __hash__(self):
- return hash(self.attr)
- # Used in generic table-driven semantics routines
- def __getitem__(self, i):
- raise IndexError
- class GenericScanner:
- """A class which can be used subclass off of to make
- specific sets of scanners. Scanner methods that are subclassed off
- of this that begin with t_ will be introspected in their
- documentation string and uses as a regular expression in a token pattern.
- For example:
- def t_add_op(self, s):
- r'[+-]'
- t = GenericToken(kind='ADD_OP', attr=s)
- self.rv.append(t)
- """
- def __init__(self):
- pattern = self.reflect()
- self.pos = 0
- self.re = re.compile(pattern, re.VERBOSE)
- self.index2func = {}
- for name, number in self.re.groupindex.items():
- self.index2func[number-1] = getattr(self, 't_' + name)
- def makeRE(self, name):
- doc = getattr(self, name).__doc__
- rv = '(?P<%s>%s)' % (name[2:], doc)
- return rv
- def reflect(self):
- rv = []
- for name in list(_namelist(self)):
- if name[:2] == 't_' and name != 't_default':
- rv.append(self.makeRE(name))
- rv.append(self.makeRE('t_default'))
- return '|'.join(rv)
- def error(self, s):
- """Simple-minded error handler. see py2_scan for another
- possibility.'
- """
- print("Lexical error in %s at position %s" % (s, self.pos))
- raise SystemExit
- def tokenize(self, s):
- self.pos = 0
- n = len(s)
- while self.pos < n:
- m = self.re.match(s, self.pos)
- if m is None:
- self.error(s)
- groups = m.groups()
- for i in range(len(groups)):
- if groups[i] and i in self.index2func:
- self.index2func[i](groups[i])
- self.pos = m.end()
- def t_default(self, s):
- r'( \n )+'
- pass
|