parser.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. # Copyright (c) 2017 Rocky Bernstein
  2. """
  3. Parsing for a trepan2/trepan3k debugger
  4. "breakpoint', "list", or "disasm" command arguments
  5. This is a debugger location along with:
  6. - an optional condition parsing for breakpoints commands
  7. - a range or count for "list" commands
  8. """
  9. from __future__ import print_function
  10. import sys
  11. from spark_parser.ast import AST
  12. from gdbloc.scanner import LocationScanner, ScannerError
  13. from spark_parser import GenericASTBuilder, DEFAULT_DEBUG
  14. class LocationError(Exception):
  15. def __init__(self, text, text_cursor):
  16. self.text = text
  17. self.text_cursor = text_cursor
  18. def __str__(self):
  19. return self.text + "\n" + self.text_cursor
  20. class LocationParser(GenericASTBuilder):
  21. """Location parsing as used in trepan2 and trepan3k
  22. for list, breakpoint, and assembly commands
  23. Note: function parse() comes from GenericASTBuilder
  24. """
  25. def __init__(self, start_nt, text, debug=DEFAULT_DEBUG):
  26. super(LocationParser, self).__init__(AST, start_nt, debug=debug)
  27. self.debug = debug
  28. self.text = text
  29. def error(self, tokens, index):
  30. token = tokens[index]
  31. if self.debug.get('local_print', False):
  32. print(self.text)
  33. print(' ' * (token.offset + len(str(token.value))) + '^')
  34. print("Syntax error at or near token '%s'" % token.value)
  35. if 'context' in self.debug and self.debug['context']:
  36. super(LocationParser, self).error(tokens, index)
  37. raise LocationError(self.text,
  38. ' ' * (token.offset + len(str(token.value))) + '^')
  39. def nonterminal(self, nt, args):
  40. has_len = hasattr(args, '__len__')
  41. collect = ('tokens',)
  42. if nt in collect:
  43. #
  44. # Collect iterated thingies together.
  45. #
  46. rv = args[0]
  47. for arg in args[1:]:
  48. rv.append(arg)
  49. if (has_len and len(args) == 1 and
  50. hasattr(args[0], '__len__') and len(args[0]) == 1):
  51. # Remove singleton derivations
  52. rv = GenericASTBuilder.nonterminal(self, nt, args[0])
  53. del args[0] # save memory
  54. else:
  55. rv = GenericASTBuilder.nonterminal(self, nt, args)
  56. return rv
  57. ##########################################################
  58. # Expression grammar rules. Grammar rule functions
  59. # start with the name p_ and are collected automatically
  60. ##########################################################
  61. def p_bp_location(self, args):
  62. '''
  63. bp_start ::= opt_space location_if opt_space
  64. '''
  65. # "disasm" command range which might refer to locations, ranges, and addresses
  66. def p_asm_range(self, args):
  67. '''
  68. arange_start ::= opt_space arange
  69. arange ::= range
  70. arange ::= addr_location opt_space COMMA opt_space NUMBER
  71. arange ::= addr_location opt_space COMMA opt_space OFFSET
  72. arange ::= addr_location opt_space COMMA opt_space ADDRESS
  73. arange ::= location opt_space COMMA opt_space ADDRESS
  74. arange ::= addr_location opt_space COMMA
  75. arange ::= addr_location
  76. # Unlike ranges, We don't allow ending at an address
  77. # arange ::= COMMA opt_space addr_location
  78. addr_location ::= location
  79. addr_location ::= ADDRESS
  80. '''
  81. # "list" command range which may refer to locations
  82. def p_list_range(self, args):
  83. '''
  84. range_start ::= opt_space range
  85. range ::= location opt_space COMMA opt_space NUMBER
  86. range ::= location opt_space COMMA opt_space OFFSET
  87. range ::= COMMA opt_space location
  88. range ::= location opt_space COMMA
  89. range ::= location
  90. range ::= DIRECTION
  91. '''
  92. # location that is used in breakpoints, list commands, and disassembly
  93. def p_location(self, args):
  94. '''
  95. opt_space ::= SPACE?
  96. location_if ::= location
  97. location_if ::= location SPACE IF tokens
  98. # Note no space is allowed between FILENAME and NUMBER
  99. location ::= FILENAME COLON NUMBER
  100. location ::= FUNCNAME
  101. # If just a number is given, the the filename is implied
  102. location ::= NUMBER
  103. location ::= METHOD
  104. location ::= OFFSET
  105. # For tokens we accept anything. Were really just
  106. # going to use the underlying string from the part
  107. # after "if". So below we all of the possible tokens
  108. tokens ::= token+
  109. token ::= COLON
  110. token ::= COMMA
  111. token ::= DIRECTION
  112. token ::= FILENAME
  113. token ::= FUNCNAME
  114. token ::= NUMBER
  115. token ::= OFFSET
  116. token ::= SPACE
  117. '''
  118. def parse_location(start_symbol, text, out=sys.stdout,
  119. show_tokens=False, parser_debug=DEFAULT_DEBUG):
  120. assert isinstance(text, str)
  121. tokens = LocationScanner().tokenize(text)
  122. if show_tokens:
  123. for t in tokens:
  124. print(t)
  125. # For heavy grammar debugging
  126. # parser_debug = {'rules': True, 'transition': True, 'reduce': True,
  127. # 'errorstack': True, 'dups': True}
  128. # parser_debug = {'rules': False, 'transition': False, 'reduce': True,
  129. # 'errorstack': 'full', 'dups': False}
  130. parser = LocationParser(start_symbol, text, parser_debug)
  131. parser.check_grammar(frozenset(('bp_start', 'range_start', 'arange_start')))
  132. return parser.parse(tokens)
  133. def parse_bp_location(*args, **kwargs):
  134. return parse_location('bp_start', *args, **kwargs)
  135. def parse_range(*args, **kwargs):
  136. return parse_location('range_start', *args, **kwargs)
  137. def parse_arange(*args, **kwargs):
  138. return parse_location('arange_start', *args, **kwargs)
  139. if __name__ == '__main__':
  140. def doit(fn, line):
  141. try:
  142. ast = fn(line, show_tokens=True)
  143. print(ast)
  144. except ScannerError as e:
  145. print("Scanner error")
  146. print(e.text)
  147. print(e.text_cursor)
  148. except LocationError as e:
  149. print("Parser error at or near")
  150. print(e.text)
  151. print(e.text_cursor)
  152. # FIXME: we should make sure all of the below is in a unit test.
  153. lines = """
  154. /tmp/foo.py:12
  155. 12
  156. ../foo.py:5
  157. gcd()
  158. foo.py:5 if x > 1
  159. """.splitlines()
  160. for line in lines:
  161. if not line.strip():
  162. continue
  163. print("=" * 30)
  164. print(line)
  165. print("+" * 30)
  166. doit(parse_bp_location, line)
  167. # bad_lines = """
  168. # /tmp/foo.py
  169. # '''/tmp/foo.py'''
  170. # /tmp/foo.py 12
  171. # gcd()
  172. # foo.py if x > 1
  173. # """.splitlines()
  174. # for line in bad_lines:
  175. # if not line.strip():
  176. # continue
  177. # print("=" * 30)
  178. # print(line)
  179. # print("+" * 30)
  180. # doit(parse_bp_location, line)
  181. # lines = """
  182. # 1
  183. # 2,
  184. # ,3
  185. # 4,10
  186. # """.splitlines()
  187. # for line in lines:
  188. # if not line.strip():
  189. # continue
  190. # print("=" * 30)
  191. # print(line)
  192. # print("+" * 30)
  193. # doit(parse_range, line)
  194. # print(ast)
  195. lines = (
  196. "*0",
  197. "*1 ,",
  198. "2 , *10",
  199. "2, 10",
  200. "*3, 10",
  201. "sys.exit() , *20"
  202. )
  203. for line in lines:
  204. line = line.strip()
  205. if not line:
  206. continue
  207. print("=" * 30)
  208. print(line)
  209. print("+" * 30)
  210. doit(parse_arange, line)