| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- # Copyright (c) 2017 Rocky Bernstein
- """
- Parsing for a trepan2/trepan3k debugger
- "breakpoint', "list", or "disasm" command arguments
- This is a debugger location along with:
- - an optional condition parsing for breakpoints commands
- - a range or count for "list" commands
- """
- from __future__ import print_function
- import sys
- from spark_parser.ast import AST
- from gdbloc.scanner import LocationScanner, ScannerError
- from spark_parser import GenericASTBuilder, DEFAULT_DEBUG
- class LocationError(Exception):
- def __init__(self, text, text_cursor):
- self.text = text
- self.text_cursor = text_cursor
- def __str__(self):
- return self.text + "\n" + self.text_cursor
- class LocationParser(GenericASTBuilder):
- """Location parsing as used in trepan2 and trepan3k
- for list, breakpoint, and assembly commands
- Note: function parse() comes from GenericASTBuilder
- """
- def __init__(self, start_nt, text, debug=DEFAULT_DEBUG):
- super(LocationParser, self).__init__(AST, start_nt, debug=debug)
- self.debug = debug
- self.text = text
- def error(self, tokens, index):
- token = tokens[index]
- if self.debug.get('local_print', False):
- print(self.text)
- print(' ' * (token.offset + len(str(token.value))) + '^')
- print("Syntax error at or near token '%s'" % token.value)
- if 'context' in self.debug and self.debug['context']:
- super(LocationParser, self).error(tokens, index)
- raise LocationError(self.text,
- ' ' * (token.offset + len(str(token.value))) + '^')
- def nonterminal(self, nt, args):
- has_len = hasattr(args, '__len__')
- collect = ('tokens',)
- if nt in collect:
- #
- # Collect iterated thingies together.
- #
- rv = args[0]
- for arg in args[1:]:
- rv.append(arg)
- if (has_len and len(args) == 1 and
- hasattr(args[0], '__len__') and len(args[0]) == 1):
- # Remove singleton derivations
- rv = GenericASTBuilder.nonterminal(self, nt, args[0])
- del args[0] # save memory
- else:
- rv = GenericASTBuilder.nonterminal(self, nt, args)
- return rv
- ##########################################################
- # Expression grammar rules. Grammar rule functions
- # start with the name p_ and are collected automatically
- ##########################################################
- def p_bp_location(self, args):
- '''
- bp_start ::= opt_space location_if opt_space
- '''
- # "disasm" command range which might refer to locations, ranges, and addresses
- def p_asm_range(self, args):
- '''
- arange_start ::= opt_space arange
- arange ::= range
- arange ::= addr_location opt_space COMMA opt_space NUMBER
- arange ::= addr_location opt_space COMMA opt_space OFFSET
- arange ::= addr_location opt_space COMMA opt_space ADDRESS
- arange ::= location opt_space COMMA opt_space ADDRESS
- arange ::= addr_location opt_space COMMA
- arange ::= addr_location
- # Unlike ranges, We don't allow ending at an address
- # arange ::= COMMA opt_space addr_location
- addr_location ::= location
- addr_location ::= ADDRESS
- '''
- # "list" command range which may refer to locations
- def p_list_range(self, args):
- '''
- range_start ::= opt_space range
- range ::= location opt_space COMMA opt_space NUMBER
- range ::= location opt_space COMMA opt_space OFFSET
- range ::= COMMA opt_space location
- range ::= location opt_space COMMA
- range ::= location
- range ::= DIRECTION
- '''
- # location that is used in breakpoints, list commands, and disassembly
- def p_location(self, args):
- '''
- opt_space ::= SPACE?
- location_if ::= location
- location_if ::= location SPACE IF tokens
- # Note no space is allowed between FILENAME and NUMBER
- location ::= FILENAME COLON NUMBER
- location ::= FUNCNAME
- # If just a number is given, the the filename is implied
- location ::= NUMBER
- location ::= METHOD
- location ::= OFFSET
- # For tokens we accept anything. Were really just
- # going to use the underlying string from the part
- # after "if". So below we all of the possible tokens
- tokens ::= token+
- token ::= COLON
- token ::= COMMA
- token ::= DIRECTION
- token ::= FILENAME
- token ::= FUNCNAME
- token ::= NUMBER
- token ::= OFFSET
- token ::= SPACE
- '''
- def parse_location(start_symbol, text, out=sys.stdout,
- show_tokens=False, parser_debug=DEFAULT_DEBUG):
- assert isinstance(text, str)
- tokens = LocationScanner().tokenize(text)
- if show_tokens:
- for t in tokens:
- print(t)
- # For heavy grammar debugging
- # parser_debug = {'rules': True, 'transition': True, 'reduce': True,
- # 'errorstack': True, 'dups': True}
- # parser_debug = {'rules': False, 'transition': False, 'reduce': True,
- # 'errorstack': 'full', 'dups': False}
- parser = LocationParser(start_symbol, text, parser_debug)
- parser.check_grammar(frozenset(('bp_start', 'range_start', 'arange_start')))
- return parser.parse(tokens)
- def parse_bp_location(*args, **kwargs):
- return parse_location('bp_start', *args, **kwargs)
- def parse_range(*args, **kwargs):
- return parse_location('range_start', *args, **kwargs)
- def parse_arange(*args, **kwargs):
- return parse_location('arange_start', *args, **kwargs)
- if __name__ == '__main__':
- def doit(fn, line):
- try:
- ast = fn(line, show_tokens=True)
- print(ast)
- except ScannerError as e:
- print("Scanner error")
- print(e.text)
- print(e.text_cursor)
- except LocationError as e:
- print("Parser error at or near")
- print(e.text)
- print(e.text_cursor)
- # FIXME: we should make sure all of the below is in a unit test.
- lines = """
- /tmp/foo.py:12
- 12
- ../foo.py:5
- gcd()
- foo.py:5 if x > 1
- """.splitlines()
- for line in lines:
- if not line.strip():
- continue
- print("=" * 30)
- print(line)
- print("+" * 30)
- doit(parse_bp_location, line)
- # bad_lines = """
- # /tmp/foo.py
- # '''/tmp/foo.py'''
- # /tmp/foo.py 12
- # gcd()
- # foo.py if x > 1
- # """.splitlines()
- # for line in bad_lines:
- # if not line.strip():
- # continue
- # print("=" * 30)
- # print(line)
- # print("+" * 30)
- # doit(parse_bp_location, line)
- # lines = """
- # 1
- # 2,
- # ,3
- # 4,10
- # """.splitlines()
- # for line in lines:
- # if not line.strip():
- # continue
- # print("=" * 30)
- # print(line)
- # print("+" * 30)
- # doit(parse_range, line)
- # print(ast)
- lines = (
- "*0",
- "*1 ,",
- "2 , *10",
- "2, 10",
- "*3, 10",
- "sys.exit() , *20"
- )
- for line in lines:
- line = line.strip()
- if not line:
- continue
- print("=" * 30)
- print(line)
- print("+" * 30)
- doit(parse_arange, line)
|