python.lark 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. // Python 3 grammar for Lark
  2. // This grammar should parse all python 3.x code successfully.
  3. // Adapted from: https://docs.python.org/3/reference/grammar.html
  4. // Start symbols for the grammar:
  5. // single_input is a single interactive statement;
  6. // file_input is a module or sequence of commands read from an input file;
  7. // eval_input is the input for the eval() functions.
  8. // NB: compound_stmt in single_input is followed by extra NEWLINE!
  9. //
  10. single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE
  11. file_input: (_NEWLINE | stmt)*
  12. eval_input: testlist _NEWLINE*
  13. decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE
  14. decorators: decorator+
  15. decorated: decorators (classdef | funcdef | async_funcdef)
  16. async_funcdef: "async" funcdef
  17. funcdef: "def" name "(" [parameters] ")" ["->" test] ":" suite
  18. parameters: paramvalue ("," paramvalue)* ["," SLASH ("," paramvalue)*] ["," [starparams | kwparams]]
  19. | starparams
  20. | kwparams
  21. SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result
  22. starparams: (starparam | starguard) poststarparams
  23. starparam: "*" typedparam
  24. starguard: "*"
  25. poststarparams: ("," paramvalue)* ["," kwparams]
  26. kwparams: "**" typedparam ","?
  27. ?paramvalue: typedparam ("=" test)?
  28. ?typedparam: name (":" test)?
  29. lambdef: "lambda" [lambda_params] ":" test
  30. lambdef_nocond: "lambda" [lambda_params] ":" test_nocond
  31. lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]]
  32. | lambda_starparams
  33. | lambda_kwparams
  34. ?lambda_paramvalue: name ("=" test)?
  35. lambda_starparams: "*" [name] ("," lambda_paramvalue)* ["," [lambda_kwparams]]
  36. lambda_kwparams: "**" name ","?
  37. ?stmt: simple_stmt | compound_stmt
  38. ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE
  39. ?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
  40. expr_stmt: testlist_star_expr
  41. assign_stmt: annassign | augassign | assign
  42. annassign: testlist_star_expr ":" test ["=" test]
  43. assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+
  44. augassign: testlist_star_expr augassign_op (yield_expr|testlist)
  45. !augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//="
  46. ?testlist_star_expr: test_or_star_expr
  47. | test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple
  48. | test_or_star_expr "," -> tuple
  49. // For normal and annotated assignments, additional restrictions enforced by the interpreter
  50. del_stmt: "del" exprlist
  51. pass_stmt: "pass"
  52. ?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
  53. break_stmt: "break"
  54. continue_stmt: "continue"
  55. return_stmt: "return" [testlist]
  56. yield_stmt: yield_expr
  57. raise_stmt: "raise" [test ["from" test]]
  58. import_stmt: import_name | import_from
  59. import_name: "import" dotted_as_names
  60. // note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS
  61. import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names)
  62. !dots: "."+
  63. import_as_name: name ["as" name]
  64. dotted_as_name: dotted_name ["as" name]
  65. import_as_names: import_as_name ("," import_as_name)* [","]
  66. dotted_as_names: dotted_as_name ("," dotted_as_name)*
  67. dotted_name: name ("." name)*
  68. global_stmt: "global" name ("," name)*
  69. nonlocal_stmt: "nonlocal" name ("," name)*
  70. assert_stmt: "assert" test ["," test]
  71. ?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | match_stmt
  72. | with_stmt | funcdef | classdef | decorated | async_stmt
  73. async_stmt: "async" (funcdef | with_stmt | for_stmt)
  74. if_stmt: "if" test ":" suite elifs ["else" ":" suite]
  75. elifs: elif_*
  76. elif_: "elif" test ":" suite
  77. while_stmt: "while" test ":" suite ["else" ":" suite]
  78. for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite]
  79. try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally]
  80. | "try" ":" suite finally -> try_finally
  81. finally: "finally" ":" suite
  82. except_clauses: except_clause+
  83. except_clause: "except" [test ["as" name]] ":" suite
  84. // NB compile.c makes sure that the default except clause is last
  85. with_stmt: "with" with_items ":" suite
  86. with_items: with_item ("," with_item)*
  87. with_item: test ["as" name]
  88. match_stmt: "match" test ":" _NEWLINE _INDENT case+ _DEDENT
  89. case: "case" pattern ["if" test] ":" suite
  90. ?pattern: sequence_item_pattern "," _sequence_pattern -> sequence_pattern
  91. | as_pattern
  92. ?as_pattern: or_pattern ("as" NAME)?
  93. ?or_pattern: closed_pattern ("|" closed_pattern)*
  94. ?closed_pattern: literal_pattern
  95. | NAME -> capture_pattern
  96. | "_" -> any_pattern
  97. | attr_pattern
  98. | "(" as_pattern ")"
  99. | "[" _sequence_pattern "]" -> sequence_pattern
  100. | "(" (sequence_item_pattern "," _sequence_pattern)? ")" -> sequence_pattern
  101. | "{" (mapping_item_pattern ("," mapping_item_pattern)* ","?)?"}" -> mapping_pattern
  102. | "{" (mapping_item_pattern ("," mapping_item_pattern)* ",")? "**" NAME ","? "}" -> mapping_star_pattern
  103. | class_pattern
  104. literal_pattern: inner_literal_pattern
  105. ?inner_literal_pattern: "None" -> const_none
  106. | "True" -> const_true
  107. | "False" -> const_false
  108. | STRING -> string
  109. | number
  110. attr_pattern: NAME ("." NAME)+ -> value
  111. name_or_attr_pattern: NAME ("." NAME)* -> value
  112. mapping_item_pattern: (literal_pattern|attr_pattern) ":" as_pattern
  113. _sequence_pattern: (sequence_item_pattern ("," sequence_item_pattern)* ","?)?
  114. ?sequence_item_pattern: as_pattern
  115. | "*" NAME -> star_pattern
  116. class_pattern: name_or_attr_pattern "(" [arguments_pattern ","?] ")"
  117. arguments_pattern: pos_arg_pattern ["," keyws_arg_pattern]
  118. | keyws_arg_pattern -> no_pos_arguments
  119. pos_arg_pattern: as_pattern ("," as_pattern)*
  120. keyws_arg_pattern: keyw_arg_pattern ("," keyw_arg_pattern)*
  121. keyw_arg_pattern: NAME "=" as_pattern
  122. suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT
  123. ?test: or_test ("if" or_test "else" test)?
  124. | lambdef
  125. | assign_expr
  126. assign_expr: name ":=" test
  127. ?test_nocond: or_test | lambdef_nocond
  128. ?or_test: and_test ("or" and_test)*
  129. ?and_test: not_test_ ("and" not_test_)*
  130. ?not_test_: "not" not_test_ -> not_test
  131. | comparison
  132. ?comparison: expr (comp_op expr)*
  133. star_expr: "*" expr
  134. ?expr: or_expr
  135. ?or_expr: xor_expr ("|" xor_expr)*
  136. ?xor_expr: and_expr ("^" and_expr)*
  137. ?and_expr: shift_expr ("&" shift_expr)*
  138. ?shift_expr: arith_expr (_shift_op arith_expr)*
  139. ?arith_expr: term (_add_op term)*
  140. ?term: factor (_mul_op factor)*
  141. ?factor: _unary_op factor | power
  142. !_unary_op: "+"|"-"|"~"
  143. !_add_op: "+"|"-"
  144. !_shift_op: "<<"|">>"
  145. !_mul_op: "*"|"@"|"/"|"%"|"//"
  146. // <> isn't actually a valid comparison operator in Python. It's here for the
  147. // sake of a __future__ import described in PEP 401 (which really works :-)
  148. !comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"
  149. ?power: await_expr ("**" factor)?
  150. ?await_expr: AWAIT? atom_expr
  151. AWAIT: "await"
  152. ?atom_expr: atom_expr "(" [arguments] ")" -> funccall
  153. | atom_expr "[" subscriptlist "]" -> getitem
  154. | atom_expr "." name -> getattr
  155. | atom
  156. ?atom: "(" yield_expr ")"
  157. | "(" _tuple_inner? ")" -> tuple
  158. | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension
  159. | "[" _exprlist? "]" -> list
  160. | "[" comprehension{test_or_star_expr} "]" -> list_comprehension
  161. | "{" _dict_exprlist? "}" -> dict
  162. | "{" comprehension{key_value} "}" -> dict_comprehension
  163. | "{" _exprlist "}" -> set
  164. | "{" comprehension{test} "}" -> set_comprehension
  165. | name -> var
  166. | number
  167. | string_concat
  168. | "(" test ")"
  169. | "..." -> ellipsis
  170. | "None" -> const_none
  171. | "True" -> const_true
  172. | "False" -> const_false
  173. ?string_concat: string+
  174. _tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",")
  175. ?test_or_star_expr: test
  176. | star_expr
  177. ?subscriptlist: subscript
  178. | subscript (("," subscript)+ [","] | ",") -> subscript_tuple
  179. ?subscript: test | ([test] ":" [test] [sliceop]) -> slice
  180. sliceop: ":" [test]
  181. ?exprlist: (expr|star_expr)
  182. | (expr|star_expr) (("," (expr|star_expr))+ [","]|",")
  183. ?testlist: test | testlist_tuple
  184. testlist_tuple: test (("," test)+ [","] | ",")
  185. _dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","]
  186. key_value: test ":" test
  187. _exprlist: test_or_star_expr ("," test_or_star_expr)* [","]
  188. classdef: "class" name ["(" [arguments] ")"] ":" suite
  189. arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])?
  190. | starargs
  191. | kwargs
  192. | comprehension{test}
  193. starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs]
  194. stararg: "*" test
  195. kwargs: "**" test ("," argvalue)*
  196. ?argvalue: test ("=" test)?
  197. comprehension{comp_result}: comp_result comp_fors [comp_if]
  198. comp_fors: comp_for+
  199. comp_for: [ASYNC] "for" exprlist "in" or_test
  200. ASYNC: "async"
  201. ?comp_if: "if" test_nocond
  202. // not used in grammar, but may appear in "node" passed from Parser to Compiler
  203. encoding_decl: name
  204. yield_expr: "yield" [testlist]
  205. | "yield" "from" test -> yield_from
  206. number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER
  207. string: STRING | LONG_STRING
  208. // Other terminals
  209. _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
  210. %ignore /[\t \f]+/ // WS
  211. %ignore /\\[\t \f]*\r?\n/ // LINE_CONT
  212. %ignore COMMENT
  213. %declare _INDENT _DEDENT
  214. // Python terminals
  215. !name: NAME | "match" | "case"
  216. NAME: /[^\W\d]\w*/
  217. COMMENT: /#[^\n]*/
  218. STRING: /([ubf]?r?|r[ubf])("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
  219. LONG_STRING: /([ubf]?r?|r[ubf])(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
  220. _SPECIAL_DEC: "0".."9" ("_"? "0".."9" )*
  221. DEC_NUMBER: "1".."9" ("_"? "0".."9" )*
  222. | "0" ("_"? "0" )* /(?![1-9])/
  223. HEX_NUMBER.2: "0" ("x" | "X") ("_"? ("0".."9" | "a".."f" | "A".."F"))+
  224. OCT_NUMBER.2: "0" ("o" | "O") ("_"? "0".."7" )+
  225. BIN_NUMBER.2: "0" ("b" | "B") ("_"? "0".."1" )+
  226. _EXP: ("e"|"E") ["+" | "-"] _SPECIAL_DEC
  227. DECIMAL: "." _SPECIAL_DEC | _SPECIAL_DEC "." _SPECIAL_DEC?
  228. FLOAT_NUMBER.2: _SPECIAL_DEC _EXP | DECIMAL _EXP?
  229. IMAG_NUMBER.2: (_SPECIAL_DEC | FLOAT_NUMBER) ("J" | "j")
  230. // Comma-separated list (with an optional trailing comma)
  231. cs_list{item}: item ("," item)* ","?
  232. _cs_list{item}: item ("," item)* ","?