| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667 |
- # Copyright (c) 2016-2017 Rocky Bernstein
- """
- More complex expression parsing
- """
- # from __future__ import print_function
- import sys
- from spark_parser.example.python2.py2_scan import ENDMARKER, Python2Scanner
- from spark_parser import GenericASTBuilder
- from spark_parser.ast import AST
- DEFAULT_DEBUG = {
- "rules": False,
- "transition": False,
- "reduce": False,
- "errorstack": "full",
- "context": True,
- "dups": True,
- }
- class PythonParser(GenericASTBuilder):
- """A more complete spark example: a Python 2 Parser.
- Note: function parse() comes from GenericASTBuilder
- """
- def __init__(self, start="file_input", debug=DEFAULT_DEBUG):
- super(PythonParser, self).__init__(AST, start, debug=debug)
- self.start = start
- self.debug = debug
- # Put left-recursive list non-terminals:
- # x ::= x y
- # x ::=
- self.collect = frozenset(
- (
- "stmts",
- "comments",
- "dot_names",
- "dots",
- "comp_op_exprs",
- "newline_or_stmts",
- "comma_names",
- "comma_fpdef_opt_eqtests",
- )
- )
- def debug_reduce(self, rule, tokens, parent, i):
- """Customized format and print for our kind of tokens
- which gets called in debugging grammar reduce rules
- """
- prefix = " "
- if parent and tokens:
- p_token = tokens[parent]
- if hasattr(p_token, "line"):
- prefix = "L.%3d.%03d: " % (p_token.line, p_token.column)
- pass
- pass
- print("%s%s ::= %s" % (prefix, rule[0], " ".join(rule[1])))
- def nonterminal(self, nt, args):
- # nonterminal with a (reserved) single word derivation
- no_skip = ("pass_stmt", "continue_stmt", "break_stmt", "return_stmt")
- has_len = hasattr(args, "__len__")
- if nt in self.collect and len(args) > 1:
- #
- # Collect iterated thingies together.
- #
- rv = args[0]
- for arg in args[1:]:
- rv.append(arg)
- elif (
- has_len
- and len(args) == 1
- and hasattr(args[0], "__len__")
- and args[0] not in no_skip
- and len(args[0]) == 1
- ):
- # Remove singleton derivations
- rv = GenericASTBuilder.nonterminal(self, nt, args[0])
- del args[0] # save memory
- elif (
- has_len
- and len(args) == 2
- and hasattr(args[1], "__len__")
- and len(args[1]) == 0
- ):
- # Remove trailing epsilon rules, but only when there
- # are two items.
- if hasattr(args[0], "__len__") and len(args[0]) == 1:
- # Remove singleton derivation
- rv = args[0]
- else:
- rv = GenericASTBuilder.nonterminal(self, nt, args[:1])
- del args[1] # save memory
- else:
- rv = GenericASTBuilder.nonterminal(self, nt, args)
- return rv
- ##########################################################
- # Python 2 grammar rules. Grammar rule functions
- # start with the name p_ and are collected automatically
- ##########################################################
- def p_python_grammar(self, args):
- """
- ### Note: comment rules that start ## are rules from python26.gr
- ## We use them to assist checking translation to a SPARK-format grammar.
- single_input ::= NEWLINE
- single_input ::= simple_stmt
- single_input ::= compound_stmt NEWLINE
- file_input ::= newline_or_stmts ENDMARKER
- newline_or_stmts ::= newline_or_stmt*
- # Grammar uses NEWLINE instead of 'sep', but ; does separate statements.
- # The grammar is vague on how NEWLINE, INDENT, and DEDENT are computed.
- newline_or_stmt ::= sep
- newline_or_stmt ::= stmt_plus
- newline_or_stmt ::= comment sep
- stmts ::= stmt*
- stmts ::= stmt sep
- stmt_plus ::= stmt+
- eval_input ::= testlist newlines ENDMARKER
- newlines ::= NEWLINE+
- decorator ::= AT dotted_name arglist_opt NEWLINE
- arglist_opt ::= arglist?
- ## arglist ::= (argument ',')*
- ## (argument [','] | '*' test (',' argument)* [',' '**' test] | '**' test)
- arglist ::= argument_commas arglist2
- argument_commas ::= argument_commas argument_comma
- argument_commas ::=
- argument_comma ::= argument COMMA
- ## (argument [','] | '*' test (',' argument)* [',' '**' test] | '**' test)
- arglist2 ::= argument comma_opt
- arglist2 ::= START test comma_arguments comma_starstar_test_opt
- arglist2 ::= STARSTAR test
- comma_arguments ::= comma_argument*
- comma_argument ::= COMMA argument
- comma_starstar_test_opt ::= COMMA STARSTAR test
- comma_starstar_test_opt ::=
- ## Really [keyword '='] test
- ## argument ::= test [gen_for] | test '=' test
- argument ::= test gen_for_opt
- argument ::= test EQUAL test
- ## list_iter ::= list_for | list_if
- list_iter ::= list_for
- list_iter ::= list_if
- ## list_for ::= 'for' exprlist 'in' testlist_safe [list_iter]
- list_for ::= FOR exprlist IN testlist_safe list_iter_opt
- list_iter_opt ::= list_iter?
- ## list_if ::= 'if' old_test [list_iter]
- list_if ::= IF old_test list_iter_opt
- gen_for_opt ::= gen_for?
- ## gen_iter ::= gen_for | gen_if
- gen_iter ::= gen_for
- gen_iter ::= gen_if
- ## gen_for ::= 'for' exprlist 'in' or_test [gen_iter]
- gen_for ::= FOR exprlist IN or_test gen_iter_opt
- gen_iter_opt ::= gen_iter?
- ## gen_if ::= 'if' old_test [gen_iter]
- gen_if ::= IF old_test gen_iter_opt
- ## testlist1 ::= test (',' test)*
- testlist1 ::= test comma_tests
- decorators ::= decorator+
- decorated ::= decorators classdef_or_funcdef
- classdef_or_funcdef ::= classdef
- classdef_or_funcdef ::= funcdef
- funcdef ::= DEF NAME parameters COLON suite
- parameters ::= LPAREN varargslist_opt RPAREN
- varargslist_opt ::= varargslist?
- # FILL IN
- ## varargslist ::= fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME)
- ## varargslist ::= fpdef ['=' test] (',' fpdef ['=' test])* [',']
- varargslist ::= fpdef eq_test_opt comma_fpdef_opt_eqtests comma_opt
- ## (',' fpdef ['=' test])*
- comma_fpdef_opt_eqtests ::= comma_fpdef_opt_eqtests COMMA fpdef eq_test_opt
- comma_fpdef_opt_eqtests ::=
- star_names ::= star_names STAR NAME star_star_opt
- star_names ::= star_names star_star_opt
- star_names ::=
- eq_tests ::= eq_tests eq_test
- eq_tests ::=
- eq_test_opt ::= eq_test?
- eq_test ::= EQUAL test
- star_star_opt ::= COMMA STAR_STAR NAME
- star_star_opt ::=
- ## fpdef ::= NAME | '(' fplist ')'
- fpdef ::= NAME
- fpdef ::= LPAREN fplist RPAREN
- ## fplist ::= fpdef (',' fpdef)* [',']
- fplist ::= fpdef fplist1 comma_opt
- ## (',' fpdef)* [',']
- fplist1 ::= fplist COMMA fpdef
- fplist1 ::=
- comma_opt ::= COMMA?
- stmt ::= simple_stmt
- stmt ::= compound_stmt
- simple_stmt ::= small_stmt
- small_stmt ::= expr_stmt
- small_stmt ::= print_stmt
- small_stmt ::= del_stmt
- small_stmt ::= pass_stmt
- small_stmt ::= flow_stmt
- small_stmt ::= import_stmt
- small_stmt ::= global_stmt
- small_stmt ::= exec_stmt
- small_stmt ::= assert_stmt
- ## expr_stmt ::= testlist (augassign (yield_expr|testlist)
- ## | ('=' (yield_expr|testlist))*)
- expr_stmt ::= testlist AUGASSIGN yield_expr_or_testlist
- expr_stmt ::= testlist EQUAL yield_expr_or_testlists
- yield_expr_or_testlists ::= yield_expr_or_testlists yield_expr_or_testlist
- yield_expr_or_testlists ::= yield_expr_or_testlist
- yield_expr_or_testlist ::= yield_expr
- yield_expr_or_testlist ::= testlist
- ## yield_expr ::= 'yield' [testlist]
- yield_expr ::= YIELD testlist_opt
- print_stmt ::= PRINT test_params_or_redirect
- test_params_or_redirect ::= test comma_test_opt comma_opt
- # FIXME: go over Not quite right as there is one or more..
- test_params_or_redirect ::= REDIRECT test comma_test_opt comma_opt
- comma_test_opt ::= COMMA test
- comma_test_opt ::=
- del_stmt ::= DEL exprlist
- pass_stmt ::= PASS
- flow_stmt ::= break_stmt
- flow_stmt ::= continue_stmt
- flow_stmt ::= return_stmt
- flow_stmt ::= raise_stmt
- flow_stmt ::= yield_stmt
- break_stmt ::= BREAK
- continue_stmt ::= CONTINUE
- # return_stmt ::= 'return' [testlist]
- return_stmt ::= RETURN testlist_opt
- testlist_opt ::= testlist?
- yield_stmt ::= yield_expr
- raise_stmt ::= RAISE test_opt3
- test_opt3 ::= test COMMA test COMMA test
- test_opt3 ::= test COMMA test
- test_opt3 ::= test
- global_stmt ::= GLOBAL NAME comma_names
- comma_names ::= comma_name*
- comma_name ::= COMMA NAME
- exec_stmt ::= EXEC expr
- exec_stmt ::= EXEC expr IN test
- exec_stmt ::= EXEC expr IN test COMMA test
- assert_stmt ::= ASSERT test
- assert_stmt ::= ASSERT test COMMA test
- test_opt ::= test?
- ## exprlist ::= expr (',' expr)* [',']
- exprlist ::= expr comma_exprs comma_opt
- ## (',' expr)*
- comma_exprs ::= comma_exprs COMMA expr
- comma_exprs ::=
- # testlist ::= test (',' test)* [',']
- testlist ::= test comma_tests comma_opt
- # (',' test)*
- comma_tests ::= comma_tests COMMA test
- comma_tests ::=
- ## Backward compatibility cruft to support:
- ## [ x for x in lambda : True, lambda : False if x() ]
- ## even while also allowing:
- ## lambda x : 5 if x else 2
- ## (But not a mix of the two)
- ## testlist_safe ::= old_test [(',' old_test)+ [',']]
- testlist_safe ::= old_test testlist_safe1_opt
- testlist_safe1_opt ::= comma_old_tests comma_opt
- testlist_safe1_opt ::=
- ## (',' old_test)+
- comma_old_tests ::= comma_old_tests comma_old_test
- comma_old_tests ::= comma_old_test
- comma_old_test ::= COMMA old_test
- ## old_test ::= or_test | old_lambdef
- old_test ::= or_test
- old_test ::= old_lambdef
- ## old_lambdef ::= 'lambda' [varargslist] ':' old_test
- old_lambdef ::= LAMBDA varargslist_opt COLON old_test
- test ::= or_test IF or_test ELSE test
- test ::= or_test
- test ::= lambdef
- or_test ::= and_test or_and_tests
- ## ('or' and_test)*
- or_and_tests ::= or_and_test*
- or_and_test ::= OR and_test
- ## and_test ::= not_test ('and' not_test)*
- and_test ::= not_test and_not_tests
- ## ('and' not_test)*
- and_not_tests ::= and_not_tests AND not_test
- and_not_tests ::=
- ## not_test ::= 'not' not_test | comparison
- not_test ::= NOT not_test
- not_test ::= comparison
- ## comparison ::= expr (comp_op expr)*
- comparison ::= expr comp_op_exprs
- ## (comp_op expr)*
- comp_op_exprs ::= comp_op_exprs comp_op expr
- comp_op_exprs ::=
- comp_op ::= COMP_OP
- comp_op ::= IN
- comp_op ::= IS
- comp_op ::= IS NOT
- # Condensation of this
- ## expr ::= xor_expr ('|' xor_expr)*
- ## xor_expr ::= and_expr ('^' and_expr)*
- ## and_expr ::= shift_expr ('&' shift_expr)*
- ## shift_expr ::= arith_expr (('<<'|'>>') arith_expr)*
- ## arith_expr ::= term (('+'|'-') term)*
- ## term ::= factor (('*'|'/'|'%'|'//') factor)*
- ## We don't care about operator precidence
- expr ::= factor binop_arith_exprs
- binop_arith_exprs ::= binop_arith_exprs binop factor
- binop_arith_exprs ::=
- binop ::= BINOP
- binop ::= PLUS
- binop ::= MINUS
- binop ::= STAR
- ## factor ::= ('+'|'-'|'~') factor | power
- factor ::= op_factor factor
- factor ::= power
- op_factor ::= PLUS
- op_factor ::= MINUS
- op_factor ::= TILDE
- power ::= atom trailers starstar_factor_opt
- ## atom ::= ('(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']'
- ## | '{' [dictmaker] '}' | '`' testlist1 '`'
- ## | NAME | NUMBER | STRING+)
- atom ::= LPAREN yield_expr_or_testlist_gexp_opt RPAREN
- atom ::= LBRACKET listmaker_opt RBRACKET
- atom ::= LBRACE dictmaker_opt RBRACE
- atom ::= BACKTICK testlist1 BACKTICK
- atom ::= NUMBER
- atom ::= NAME
- atom ::= strings
- dictmaker_opt ::= dictmaker?
- ## [yield_expr|testlist_gexp]
- yield_expr_or_testlist_gexp_opt ::= yield_expr
- yield_expr_or_testlist_gexp_opt ::= testlist_gexp
- yield_expr_or_testlist_gexp_opt ::=
- listmaker_opt ::= listmaker?
- ## listmaker ::= test ( list_for | (',' test)* [','] )
- listmaker ::= test list_for_or_comma_tests_comma_opt
- list_for_or_comma_tests_comma_opt ::= list_for
- list_for_or_comma_tests_comma_opt ::= comma_tests comma_opt
- ## testlist_gexp ::= test ( gen_for | (',' test)* [','] )
- testlist_gexp ::= test gen_for_or_comma_tests_comma_opt
- gen_for_or_comma_tests_comma_opt ::= gen_for
- gen_for_or_comma_tests_comma_opt ::= comma_tests comma_opt
- lambdef ::= LAMBDA varargslist_opt COLON test
- trailers ::= trailer*
- ## trailer ::= '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
- trailer ::= LPAREN arglist_opt RPAREN
- trailer ::= LBRACKET subscriptlist RBRACKET
- trailer ::= DOT NAME
- ## subscriptlist ::= subscript (',' subscript)* [',']
- subscriptlist ::= subscript comma_subscripts comma_opt
- ## (',' subscript)*
- comma_subscripts ::= comma_subscripts comma_subscript
- comma_subscripts ::=
- ## ',' subscript
- comma_subscript ::= COMMA subscript
- ## subscript ::= '.' '.' '.' | test | [test] ':' [test] [sliceop]
- subscript ::= DOT DOT DOT
- subscript ::= test
- subscript ::= test_opt COLON test_opt sliceop_opt
- sliceop_opt ::= sliceop?
- ## sliceop ::= ':' [test]
- sliceop ::= COLON test_opt
- starstar_factor_opt ::= STARSTAR factor
- starstar_factor_opt ::=
- ## dictmaker ::= test ':' test (',' test ':' test)* [',']
- dictmaker ::= test COLON comma_test_colon_tests comma_opt
- ## (',' test ':' test)*
- comma_test_colon_tests ::= comma_test_colon_tests comma_test_colon_test
- comma_test_colon_tests ::=
- ## (',' test ':' test)
- comma_test_colon_test ::= COMMA test COLON test
- classdef ::= CLASS NAME class_subclass_opt COLON suite
- class_subclass_opt ::= LPAREN testlist_opt RPAREN
- class_subclass_opt ::=
- strings ::= STRING+
- sep ::= comments
- sep ::= NEWLINE
- sep ::= SEMICOLON
- comments ::= comment+
- comment ::= COMMENT
- comment ::= COMMENT NEWLINE
- """
- # Import-related grammar
- def p_import(self, args):
- r"""
- ## import_stmt ::= import_name | import_from
- import_stmt ::= import_name
- import_stmt ::= import_from
- ## import_name ::= IMPORT dotted_as_names
- import_name ::= IMPORT dotted_as_names
- ## import_from ::= ('from' ('.'* dotted_name | '.'+)
- ## 'import' ('*' | '(' import_as_names ')' | import_as_names))
- import_from ::= FROM dots_dotted_name_or_dots import_list
- import_as_name ::= NAME
- import_as_name ::= NAME AS NAME
- dotted_as_name ::= dotted_name
- dotted_as_name ::= dotted_name AS NAME
- dots_dotted_name_or_dots ::= dots dotted_name
- dots_dotted_name_or_dots ::= DOT dots
- dots ::= DOT*
- ## 'import' ('*' | '(' import_as_names ')' | import_as_names))
- import_list ::= IMPORT STAR
- import_list ::= IMPORT LPAREN import_as_names RPAREN
- import_list ::= IMPORT import_as_names
- ## import_as_names ::= import_as_name ((',' import_as_name)+\) [',']
- # Note: we don't do the opt comma at the end
- import_as_names ::= import_as_name comma_import_as_names
- ## (',' import_as_name)+
- comma_import_as_names ::= comma_import_as_names comma_import_as_name
- comma_import_as_names ::=
- ## ',' import_as_name
- comma_import_as_name ::= COMMA import_as_name
- comma_dotted_as_names ::= dotted_as_name+
- dotted_as_names ::= dotted_as_name comma_dotted_as_names
- comma_dotted_as_names ::= comma_dotted_as_names COMMA dotted_as_name
- comma_dotted_as_names ::=
- dotted_name ::= NAME dot_names
- dot_names ::= dot_names DOT NAME
- dot_names ::=
- """
- def p_compund_stmt(self, args):
- """
- compound_stmt ::= if_stmt
- compound_stmt ::= while_stmt
- compound_stmt ::= for_stmt
- compound_stmt ::= try_stmt
- compound_stmt ::= with_stmt
- compound_stmt ::= funcdef
- compound_stmt ::= classdef
- compound_stmt ::= decorated
- if_stmt ::= IF test COLON suite elif_suites else_suite_opt
- if_stmt ::= IF test COLON NEWLINE suite elif_suites else_suite_opt
- elif_suites ::= elif_suites ELIF test COLON suite
- elif_suites ::=
- else_suite_opt ::= ELSE COLON suite
- else_suite_opt ::=
- ## while_stmt ::= 'while' test ':' suite ['else' ':' suite]
- while_stmt ::= WHILE test COLON suite else_suite_opt
- ## for_stmt ::= 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
- for_stmt ::= FOR exprlist IN testlist COLON suite else_colon_suite_opt
- ## ['else' ':' suite]
- else_colon_suite_opt ::= ELSE COLON suite
- else_colon_suite_opt ::=
- ## try_stmt ::= ('try' ':' suite
- ## ((except_clause ':' suite)+
- ## ['else' ':' suite]
- ## ['finally' ':' suite] |
- ## 'finally' ':' suite))
- ## with_stmt ::= with' test [ with_var ] ':' suite
- with_stmt ::= WITH test with_var_opt COLON suite
- with_var_opt ::= with_var?
- ## with_var ::= 'as' expr
- with_var ::= AS expr
- suite ::= stmt_plus
- suite ::= NEWLINE indent stmt_plus NEWLINE DEDENT
- suite ::= NEWLINE indent stmt_plus DEDENT
- indent ::= INDENT comments
- indent ::= INDENT
- """
- def parse_python2(
- python_stmts,
- start="file_input",
- show_tokens=False,
- parser_debug=DEFAULT_DEBUG,
- check=False,
- ):
- assert isinstance(python_stmts, str)
- tokens = Python2Scanner().tokenize(python_stmts)
- if show_tokens:
- for t in tokens:
- print(t)
- # For heavy grammar debugging:
- # parser_debug = {'rules': True, 'transition': True, 'reduce': True,
- # 'errorstack': 'full', 'context': True, 'dups': True}
- # Normal debugging:
- # parser_debug = {'rules': False, 'transition': False, 'reduce': True,
- # 'errorstack': 'full', 'context': True, 'dups': True}
- parser = PythonParser(start=start, debug=parser_debug)
- if check:
- parser.check_grammar()
- return parser.parse(tokens)
- if __name__ == "__main__":
- if len(sys.argv) == 1:
- for python2_stmts in (
- # # "if True: pass",
- # """
- # while True:
- # if False:
- # continue
- # """,
- # "if True: pass",
- """return f()""",
- ):
- print(python2_stmts)
- print("-" * 30)
- ast = parse_python2(
- python2_stmts + ENDMARKER,
- start="file_input",
- show_tokens=False,
- check=True,
- )
- print(ast)
- print("=" * 30)
- else:
- python2_stmts = " ".join(sys.argv[1:])
- parse_python2(python2_stmts, show_tokens=False, check=True)
|