c_parser.py 88 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376
  1. # ------------------------------------------------------------------------------
  2. # pycparser: c_parser.py
  3. #
  4. # Recursive-descent parser for the C language.
  5. #
  6. # Eli Bendersky [https://eli.thegreenplace.net/]
  7. # License: BSD
  8. # ------------------------------------------------------------------------------
  9. from dataclasses import dataclass
  10. from typing import (
  11. Any,
  12. Dict,
  13. List,
  14. Literal,
  15. NoReturn,
  16. Optional,
  17. Tuple,
  18. TypedDict,
  19. cast,
  20. )
  21. from . import c_ast
  22. from .c_lexer import CLexer, _Token
  23. from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
  24. @dataclass
  25. class Coord:
  26. """Coordinates of a syntactic element. Consists of:
  27. - File name
  28. - Line number
  29. - Column number
  30. """
  31. file: str
  32. line: int
  33. column: Optional[int] = None
  34. def __str__(self) -> str:
  35. text = f"{self.file}:{self.line}"
  36. if self.column:
  37. text += f":{self.column}"
  38. return text
  39. class ParseError(Exception):
  40. pass
  41. class CParser:
  42. """Recursive-descent C parser.
  43. Usage:
  44. parser = CParser()
  45. ast = parser.parse(text, filename)
  46. The `lexer` parameter lets you inject a lexer class (defaults to CLexer).
  47. The parameters after `lexer` are accepted for backward compatibility with
  48. the old PLY-based parser and are otherwise unused.
  49. """
  50. def __init__(
  51. self,
  52. lex_optimize: bool = True,
  53. lexer: type[CLexer] = CLexer,
  54. lextab: str = "pycparser.lextab",
  55. yacc_optimize: bool = True,
  56. yacctab: str = "pycparser.yacctab",
  57. yacc_debug: bool = False,
  58. taboutputdir: str = "",
  59. ) -> None:
  60. self.clex: CLexer = lexer(
  61. error_func=self._lex_error_func,
  62. on_lbrace_func=self._lex_on_lbrace_func,
  63. on_rbrace_func=self._lex_on_rbrace_func,
  64. type_lookup_func=self._lex_type_lookup_func,
  65. )
  66. # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
  67. # the current (topmost) scope. Each scope is a dictionary that
  68. # specifies whether a name is a type. If _scope_stack[n][name] is
  69. # True, 'name' is currently a type in the scope. If it's False,
  70. # 'name' is used in the scope but not as a type (for instance, if we
  71. # saw: int name;
  72. # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
  73. # in this scope at all.
  74. self._scope_stack: List[Dict[str, bool]] = [dict()]
  75. self._tokens: _TokenStream = _TokenStream(self.clex)
  76. def parse(
  77. self, text: str, filename: str = "", debug: bool = False
  78. ) -> c_ast.FileAST:
  79. """Parses C code and returns an AST.
  80. text:
  81. A string containing the C source code
  82. filename:
  83. Name of the file being parsed (for meaningful
  84. error messages)
  85. debug:
  86. Deprecated debug flag (unused); for backwards compatibility.
  87. """
  88. self._scope_stack = [dict()]
  89. self.clex.input(text, filename)
  90. self._tokens = _TokenStream(self.clex)
  91. ast = self._parse_translation_unit_or_empty()
  92. tok = self._peek()
  93. if tok is not None:
  94. self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
  95. return ast
  96. # ------------------------------------------------------------------
  97. # Scope and declaration helpers
  98. # ------------------------------------------------------------------
  99. def _coord(self, lineno: int, column: Optional[int] = None) -> Coord:
  100. return Coord(file=self.clex.filename, line=lineno, column=column)
  101. def _parse_error(self, msg: str, coord: Coord | str | None) -> NoReturn:
  102. raise ParseError(f"{coord}: {msg}")
  103. def _push_scope(self) -> None:
  104. self._scope_stack.append(dict())
  105. def _pop_scope(self) -> None:
  106. assert len(self._scope_stack) > 1
  107. self._scope_stack.pop()
  108. def _add_typedef_name(self, name: str, coord: Optional[Coord]) -> None:
  109. """Add a new typedef name (ie a TYPEID) to the current scope"""
  110. if not self._scope_stack[-1].get(name, True):
  111. self._parse_error(
  112. f"Typedef {name!r} previously declared as non-typedef in this scope",
  113. coord,
  114. )
  115. self._scope_stack[-1][name] = True
  116. def _add_identifier(self, name: str, coord: Optional[Coord]) -> None:
  117. """Add a new object, function, or enum member name (ie an ID) to the
  118. current scope
  119. """
  120. if self._scope_stack[-1].get(name, False):
  121. self._parse_error(
  122. f"Non-typedef {name!r} previously declared as typedef in this scope",
  123. coord,
  124. )
  125. self._scope_stack[-1][name] = False
  126. def _is_type_in_scope(self, name: str) -> bool:
  127. """Is *name* a typedef-name in the current scope?"""
  128. for scope in reversed(self._scope_stack):
  129. # If name is an identifier in this scope it shadows typedefs in
  130. # higher scopes.
  131. in_scope = scope.get(name)
  132. if in_scope is not None:
  133. return in_scope
  134. return False
  135. def _lex_error_func(self, msg: str, line: int, column: int) -> None:
  136. self._parse_error(msg, self._coord(line, column))
  137. def _lex_on_lbrace_func(self) -> None:
  138. self._push_scope()
  139. def _lex_on_rbrace_func(self) -> None:
  140. self._pop_scope()
  141. def _lex_type_lookup_func(self, name: str) -> bool:
  142. """Looks up types that were previously defined with
  143. typedef.
  144. Passed to the lexer for recognizing identifiers that
  145. are types.
  146. """
  147. return self._is_type_in_scope(name)
  148. # To understand what's going on here, read sections A.8.5 and
  149. # A.8.6 of K&R2 very carefully.
  150. #
  151. # A C type consists of a basic type declaration, with a list
  152. # of modifiers. For example:
  153. #
  154. # int *c[5];
  155. #
  156. # The basic declaration here is 'int c', and the pointer and
  157. # the array are the modifiers.
  158. #
  159. # Basic declarations are represented by TypeDecl (from module c_ast) and the
  160. # modifiers are FuncDecl, PtrDecl and ArrayDecl.
  161. #
  162. # The standard states that whenever a new modifier is parsed, it should be
  163. # added to the end of the list of modifiers. For example:
  164. #
  165. # K&R2 A.8.6.2: Array Declarators
  166. #
  167. # In a declaration T D where D has the form
  168. # D1 [constant-expression-opt]
  169. # and the type of the identifier in the declaration T D1 is
  170. # "type-modifier T", the type of the
  171. # identifier of D is "type-modifier array of T"
  172. #
  173. # This is what this method does. The declarator it receives
  174. # can be a list of declarators ending with TypeDecl. It
  175. # tacks the modifier to the end of this list, just before
  176. # the TypeDecl.
  177. #
  178. # Additionally, the modifier may be a list itself. This is
  179. # useful for pointers, that can come as a chain from the rule
  180. # p_pointer. In this case, the whole modifier list is spliced
  181. # into the new location.
  182. def _type_modify_decl(self, decl: Any, modifier: Any) -> c_ast.Node:
  183. """Tacks a type modifier on a declarator, and returns
  184. the modified declarator.
  185. Note: the declarator and modifier may be modified
  186. """
  187. modifier_head = modifier
  188. modifier_tail = modifier
  189. # The modifier may be a nested list. Reach its tail.
  190. while modifier_tail.type:
  191. modifier_tail = modifier_tail.type
  192. # If the decl is a basic type, just tack the modifier onto it.
  193. if isinstance(decl, c_ast.TypeDecl):
  194. modifier_tail.type = decl
  195. return modifier
  196. else:
  197. # Otherwise, the decl is a list of modifiers. Reach
  198. # its tail and splice the modifier onto the tail,
  199. # pointing to the underlying basic type.
  200. decl_tail = decl
  201. while not isinstance(decl_tail.type, c_ast.TypeDecl):
  202. decl_tail = decl_tail.type
  203. modifier_tail.type = decl_tail.type
  204. decl_tail.type = modifier_head
  205. return decl
  206. # Due to the order in which declarators are constructed,
  207. # they have to be fixed in order to look like a normal AST.
  208. #
  209. # When a declaration arrives from syntax construction, it has
  210. # these problems:
  211. # * The innermost TypeDecl has no type (because the basic
  212. # type is only known at the uppermost declaration level)
  213. # * The declaration has no variable name, since that is saved
  214. # in the innermost TypeDecl
  215. # * The typename of the declaration is a list of type
  216. # specifiers, and not a node. Here, basic identifier types
  217. # should be separated from more complex types like enums
  218. # and structs.
  219. #
  220. # This method fixes these problems.
  221. def _fix_decl_name_type(
  222. self,
  223. decl: c_ast.Decl | c_ast.Typedef | c_ast.Typename,
  224. typename: List[Any],
  225. ) -> c_ast.Decl | c_ast.Typedef | c_ast.Typename:
  226. """Fixes a declaration. Modifies decl."""
  227. # Reach the underlying basic type
  228. typ = decl
  229. while not isinstance(typ, c_ast.TypeDecl):
  230. typ = typ.type
  231. decl.name = typ.declname
  232. typ.quals = decl.quals[:]
  233. # The typename is a list of types. If any type in this
  234. # list isn't an IdentifierType, it must be the only
  235. # type in the list (it's illegal to declare "int enum ..")
  236. # If all the types are basic, they're collected in the
  237. # IdentifierType holder.
  238. for tn in typename:
  239. if not isinstance(tn, c_ast.IdentifierType):
  240. if len(typename) > 1:
  241. self._parse_error("Invalid multiple types specified", tn.coord)
  242. else:
  243. typ.type = tn
  244. return decl
  245. if not typename:
  246. # Functions default to returning int
  247. if not isinstance(decl.type, c_ast.FuncDecl):
  248. self._parse_error("Missing type in declaration", decl.coord)
  249. typ.type = c_ast.IdentifierType(["int"], coord=decl.coord)
  250. else:
  251. # At this point, we know that typename is a list of IdentifierType
  252. # nodes. Concatenate all the names into a single list.
  253. typ.type = c_ast.IdentifierType(
  254. [name for id in typename for name in id.names], coord=typename[0].coord
  255. )
  256. return decl
  257. def _add_declaration_specifier(
  258. self,
  259. declspec: Optional["_DeclSpec"],
  260. newspec: Any,
  261. kind: "_DeclSpecKind",
  262. append: bool = False,
  263. ) -> "_DeclSpec":
  264. """See _DeclSpec for the specifier dictionary layout."""
  265. if declspec is None:
  266. spec: _DeclSpec = dict(
  267. qual=[], storage=[], type=[], function=[], alignment=[]
  268. )
  269. else:
  270. spec = declspec
  271. if append:
  272. spec[kind].append(newspec)
  273. else:
  274. spec[kind].insert(0, newspec)
  275. return spec
  276. def _build_declarations(
  277. self,
  278. spec: "_DeclSpec",
  279. decls: List["_DeclInfo"],
  280. typedef_namespace: bool = False,
  281. ) -> List[c_ast.Node]:
  282. """Builds a list of declarations all sharing the given specifiers.
  283. If typedef_namespace is true, each declared name is added
  284. to the "typedef namespace", which also includes objects,
  285. functions, and enum constants.
  286. """
  287. is_typedef = "typedef" in spec["storage"]
  288. declarations = []
  289. # Bit-fields are allowed to be unnamed.
  290. if decls[0].get("bitsize") is None:
  291. # When redeclaring typedef names as identifiers in inner scopes, a
  292. # problem can occur where the identifier gets grouped into
  293. # spec['type'], leaving decl as None. This can only occur for the
  294. # first declarator.
  295. if decls[0]["decl"] is None:
  296. if (
  297. len(spec["type"]) < 2
  298. or len(spec["type"][-1].names) != 1
  299. or not self._is_type_in_scope(spec["type"][-1].names[0])
  300. ):
  301. coord = "?"
  302. for t in spec["type"]:
  303. if hasattr(t, "coord"):
  304. coord = t.coord
  305. break
  306. self._parse_error("Invalid declaration", coord)
  307. # Make this look as if it came from "direct_declarator:ID"
  308. decls[0]["decl"] = c_ast.TypeDecl(
  309. declname=spec["type"][-1].names[0],
  310. type=None,
  311. quals=None,
  312. align=spec["alignment"],
  313. coord=spec["type"][-1].coord,
  314. )
  315. # Remove the "new" type's name from the end of spec['type']
  316. del spec["type"][-1]
  317. # A similar problem can occur where the declaration ends up
  318. # looking like an abstract declarator. Give it a name if this is
  319. # the case.
  320. elif not isinstance(
  321. decls[0]["decl"],
  322. (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
  323. ):
  324. decls_0_tail = cast(Any, decls[0]["decl"])
  325. while not isinstance(decls_0_tail, c_ast.TypeDecl):
  326. decls_0_tail = decls_0_tail.type
  327. if decls_0_tail.declname is None:
  328. decls_0_tail.declname = spec["type"][-1].names[0]
  329. del spec["type"][-1]
  330. for decl in decls:
  331. assert decl["decl"] is not None
  332. if is_typedef:
  333. declaration = c_ast.Typedef(
  334. name=None,
  335. quals=spec["qual"],
  336. storage=spec["storage"],
  337. type=decl["decl"],
  338. coord=decl["decl"].coord,
  339. )
  340. else:
  341. declaration = c_ast.Decl(
  342. name=None,
  343. quals=spec["qual"],
  344. align=spec["alignment"],
  345. storage=spec["storage"],
  346. funcspec=spec["function"],
  347. type=decl["decl"],
  348. init=decl.get("init"),
  349. bitsize=decl.get("bitsize"),
  350. coord=decl["decl"].coord,
  351. )
  352. if isinstance(
  353. declaration.type,
  354. (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
  355. ):
  356. fixed_decl = declaration
  357. else:
  358. fixed_decl = self._fix_decl_name_type(declaration, spec["type"])
  359. # Add the type name defined by typedef to a
  360. # symbol table (for usage in the lexer)
  361. if typedef_namespace:
  362. if is_typedef:
  363. self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
  364. else:
  365. self._add_identifier(fixed_decl.name, fixed_decl.coord)
  366. fixed_decl = fix_atomic_specifiers(
  367. cast(c_ast.Decl | c_ast.Typedef, fixed_decl)
  368. )
  369. declarations.append(fixed_decl)
  370. return declarations
  371. def _build_function_definition(
  372. self,
  373. spec: "_DeclSpec",
  374. decl: c_ast.Node,
  375. param_decls: Optional[List[c_ast.Node]],
  376. body: c_ast.Node,
  377. ) -> c_ast.Node:
  378. """Builds a function definition."""
  379. if "typedef" in spec["storage"]:
  380. self._parse_error("Invalid typedef", decl.coord)
  381. declaration = self._build_declarations(
  382. spec=spec,
  383. decls=[dict(decl=decl, init=None, bitsize=None)],
  384. typedef_namespace=True,
  385. )[0]
  386. return c_ast.FuncDef(
  387. decl=declaration, param_decls=param_decls, body=body, coord=decl.coord
  388. )
  389. def _select_struct_union_class(self, token: str) -> type:
  390. """Given a token (either STRUCT or UNION), selects the
  391. appropriate AST class.
  392. """
  393. if token == "struct":
  394. return c_ast.Struct
  395. else:
  396. return c_ast.Union
  397. # ------------------------------------------------------------------
  398. # Token helpers
  399. # ------------------------------------------------------------------
  400. def _peek(self, k: int = 1) -> Optional[_Token]:
  401. """Return the k-th next token without consuming it (1-based)."""
  402. return self._tokens.peek(k)
  403. def _peek_type(self, k: int = 1) -> Optional[str]:
  404. """Return the type of the k-th next token, or None if absent (1-based)."""
  405. tok = self._peek(k)
  406. return tok.type if tok is not None else None
  407. def _advance(self) -> _Token:
  408. tok = self._tokens.next()
  409. if tok is None:
  410. self._parse_error("At end of input", self.clex.filename)
  411. else:
  412. return tok
  413. def _accept(self, token_type: str) -> Optional[_Token]:
  414. """Conditionally consume next token, only if it's of token_type.
  415. If it is of the expected type, consume and return it.
  416. Otherwise, leaves the token intact and returns None.
  417. """
  418. tok = self._peek()
  419. if tok is not None and tok.type == token_type:
  420. return self._advance()
  421. return None
  422. def _expect(self, token_type: str) -> _Token:
  423. tok = self._advance()
  424. if tok.type != token_type:
  425. self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
  426. return tok
  427. def _mark(self) -> int:
  428. return self._tokens.mark()
  429. def _reset(self, mark: int) -> None:
  430. self._tokens.reset(mark)
  431. def _tok_coord(self, tok: _Token) -> Coord:
  432. return self._coord(tok.lineno, tok.column)
  433. def _starts_declaration(self, tok: Optional[_Token] = None) -> bool:
  434. tok = tok or self._peek()
  435. if tok is None:
  436. return False
  437. return tok.type in _DECL_START
  438. def _starts_expression(self, tok: Optional[_Token] = None) -> bool:
  439. tok = tok or self._peek()
  440. if tok is None:
  441. return False
  442. return tok.type in _STARTS_EXPRESSION
  443. def _starts_statement(self) -> bool:
  444. tok_type = self._peek_type()
  445. if tok_type is None:
  446. return False
  447. if tok_type in _STARTS_STATEMENT:
  448. return True
  449. return self._starts_expression()
  450. def _starts_declarator(self, id_only: bool = False) -> bool:
  451. tok_type = self._peek_type()
  452. if tok_type is None:
  453. return False
  454. if tok_type in {"TIMES", "LPAREN"}:
  455. return True
  456. if id_only:
  457. return tok_type == "ID"
  458. return tok_type in {"ID", "TYPEID"}
  459. def _peek_declarator_name_info(self) -> Tuple[Optional[str], bool]:
  460. mark = self._mark()
  461. tok_type, saw_paren = self._scan_declarator_name_info()
  462. self._reset(mark)
  463. return tok_type, saw_paren
  464. def _parse_any_declarator(
  465. self, allow_abstract: bool = False, typeid_paren_as_abstract: bool = False
  466. ) -> Tuple[Optional[c_ast.Node], bool]:
  467. # C declarators are ambiguous without lookahead. For example:
  468. # int foo(int (aa)); -> aa is a name (ID)
  469. # typedef char TT;
  470. # int bar(int (TT)); -> TT is a type (TYPEID) in parens
  471. name_type, saw_paren = self._peek_declarator_name_info()
  472. if name_type is None or (
  473. typeid_paren_as_abstract and name_type == "TYPEID" and saw_paren
  474. ):
  475. if not allow_abstract:
  476. tok = self._peek()
  477. coord = self._tok_coord(tok) if tok is not None else self.clex.filename
  478. self._parse_error("Invalid declarator", coord)
  479. decl = self._parse_abstract_declarator_opt()
  480. return decl, False
  481. if name_type == "TYPEID":
  482. if typeid_paren_as_abstract:
  483. decl = self._parse_typeid_noparen_declarator()
  484. else:
  485. decl = self._parse_typeid_declarator()
  486. else:
  487. decl = self._parse_id_declarator()
  488. return decl, True
  489. def _scan_declarator_name_info(self) -> Tuple[Optional[str], bool]:
  490. saw_paren = False
  491. while self._accept("TIMES"):
  492. while self._peek_type() in _TYPE_QUALIFIER:
  493. self._advance()
  494. tok = self._peek()
  495. if tok is None:
  496. return None, saw_paren
  497. if tok.type in {"ID", "TYPEID"}:
  498. self._advance()
  499. return tok.type, saw_paren
  500. if tok.type == "LPAREN":
  501. saw_paren = True
  502. self._advance()
  503. tok_type, nested_paren = self._scan_declarator_name_info()
  504. if nested_paren:
  505. saw_paren = True
  506. depth = 1
  507. while True:
  508. tok = self._peek()
  509. if tok is None:
  510. return None, saw_paren
  511. if tok.type == "LPAREN":
  512. depth += 1
  513. elif tok.type == "RPAREN":
  514. depth -= 1
  515. self._advance()
  516. if depth == 0:
  517. break
  518. continue
  519. self._advance()
  520. return tok_type, saw_paren
  521. return None, saw_paren
  522. def _starts_direct_abstract_declarator(self) -> bool:
  523. return self._peek_type() in {"LPAREN", "LBRACKET"}
  524. def _is_assignment_op(self) -> bool:
  525. tok = self._peek()
  526. return tok is not None and tok.type in _ASSIGNMENT_OPS
  527. def _try_parse_paren_type_name(
  528. self,
  529. ) -> Optional[Tuple[c_ast.Typename, int, _Token]]:
  530. """Parse and return a parenthesized type name if present.
  531. Returns (typ, mark, lparen_tok) when the next tokens look like
  532. '(' type_name ')', where typ is the parsed type name, mark is the
  533. token-stream position before parsing, and lparen_tok is the LPAREN
  534. token. Returns None if no parenthesized type name is present.
  535. """
  536. mark = self._mark()
  537. lparen_tok = self._accept("LPAREN")
  538. if lparen_tok is None:
  539. return None
  540. if not self._starts_declaration():
  541. self._reset(mark)
  542. return None
  543. typ = self._parse_type_name()
  544. if self._accept("RPAREN") is None:
  545. self._reset(mark)
  546. return None
  547. return typ, mark, lparen_tok
  548. # ------------------------------------------------------------------
  549. # Top-level
  550. # ------------------------------------------------------------------
  551. # BNF: translation_unit_or_empty : translation_unit | empty
  552. def _parse_translation_unit_or_empty(self) -> c_ast.FileAST:
  553. if self._peek() is None:
  554. return c_ast.FileAST([])
  555. return c_ast.FileAST(self._parse_translation_unit())
  556. # BNF: translation_unit : external_declaration+
  557. def _parse_translation_unit(self) -> List[c_ast.Node]:
  558. ext = []
  559. while self._peek() is not None:
  560. ext.extend(self._parse_external_declaration())
  561. return ext
  562. # BNF: external_declaration : function_definition
  563. # | declaration
  564. # | pp_directive
  565. # | pppragma_directive
  566. # | static_assert
  567. # | ';'
  568. def _parse_external_declaration(self) -> List[c_ast.Node]:
  569. tok = self._peek()
  570. if tok is None:
  571. return []
  572. if tok.type == "PPHASH":
  573. self._parse_pp_directive()
  574. return []
  575. if tok.type in {"PPPRAGMA", "_PRAGMA"}:
  576. return [self._parse_pppragma_directive()]
  577. if self._accept("SEMI"):
  578. return []
  579. if tok.type == "_STATIC_ASSERT":
  580. return self._parse_static_assert()
  581. if not self._starts_declaration(tok):
  582. # Special handling for old-style function definitions that have an
  583. # implicit return type, e.g.
  584. #
  585. # foo() {
  586. # return 5;
  587. # }
  588. #
  589. # These get an implicit 'int' return type.
  590. decl = self._parse_id_declarator()
  591. param_decls = None
  592. if self._peek_type() != "LBRACE":
  593. self._parse_error("Invalid function definition", decl.coord)
  594. spec: _DeclSpec = dict(
  595. qual=[],
  596. alignment=[],
  597. storage=[],
  598. type=[c_ast.IdentifierType(["int"], coord=decl.coord)],
  599. function=[],
  600. )
  601. func = self._build_function_definition(
  602. spec=spec,
  603. decl=decl,
  604. param_decls=param_decls,
  605. body=self._parse_compound_statement(),
  606. )
  607. return [func]
  608. # From here on, parsing a standard declatation/definition.
  609. spec, saw_type, spec_coord = self._parse_declaration_specifiers(
  610. allow_no_type=True
  611. )
  612. name_type, _ = self._peek_declarator_name_info()
  613. if name_type != "ID":
  614. decls = self._parse_decl_body_with_spec(spec, saw_type)
  615. self._expect("SEMI")
  616. return decls
  617. decl = self._parse_id_declarator()
  618. if self._peek_type() == "LBRACE" or self._starts_declaration():
  619. param_decls = None
  620. if self._starts_declaration():
  621. param_decls = self._parse_declaration_list()
  622. if self._peek_type() != "LBRACE":
  623. self._parse_error("Invalid function definition", decl.coord)
  624. if not spec["type"]:
  625. spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
  626. func = self._build_function_definition(
  627. spec=spec,
  628. decl=decl,
  629. param_decls=param_decls,
  630. body=self._parse_compound_statement(),
  631. )
  632. return [func]
  633. decl_dict: "_DeclInfo" = dict(decl=decl, init=None, bitsize=None)
  634. if self._accept("EQUALS"):
  635. decl_dict["init"] = self._parse_initializer()
  636. decls = self._parse_init_declarator_list(first=decl_dict)
  637. decls = self._build_declarations(spec=spec, decls=decls, typedef_namespace=True)
  638. self._expect("SEMI")
  639. return decls
  640. # ------------------------------------------------------------------
  641. # Declarations
  642. #
  643. # Declarations always come as lists (because they can be several in one
  644. # line). When returning parsed declarations, a list is always returned -
  645. # even if it contains a single element.
  646. # ------------------------------------------------------------------
  647. def _parse_declaration(self) -> List[c_ast.Node]:
  648. decls = self._parse_decl_body()
  649. self._expect("SEMI")
  650. return decls
  651. # BNF: decl_body : declaration_specifiers decl_body_with_spec
  652. def _parse_decl_body(self) -> List[c_ast.Node]:
  653. spec, saw_type, _ = self._parse_declaration_specifiers(allow_no_type=True)
  654. return self._parse_decl_body_with_spec(spec, saw_type)
  655. # BNF: decl_body_with_spec : init_declarator_list
  656. # | struct_or_union_or_enum_only
  657. def _parse_decl_body_with_spec(
  658. self, spec: "_DeclSpec", saw_type: bool
  659. ) -> List[c_ast.Node]:
  660. decls = None
  661. if saw_type:
  662. if self._starts_declarator():
  663. decls = self._parse_init_declarator_list()
  664. else:
  665. if self._starts_declarator(id_only=True):
  666. decls = self._parse_init_declarator_list(id_only=True)
  667. if decls is None:
  668. ty = spec["type"]
  669. s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
  670. if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
  671. decls = [
  672. c_ast.Decl(
  673. name=None,
  674. quals=spec["qual"],
  675. align=spec["alignment"],
  676. storage=spec["storage"],
  677. funcspec=spec["function"],
  678. type=ty[0],
  679. init=None,
  680. bitsize=None,
  681. coord=ty[0].coord,
  682. )
  683. ]
  684. else:
  685. decls = self._build_declarations(
  686. spec=spec,
  687. decls=[dict(decl=None, init=None, bitsize=None)],
  688. typedef_namespace=True,
  689. )
  690. else:
  691. decls = self._build_declarations(
  692. spec=spec, decls=decls, typedef_namespace=True
  693. )
  694. return decls
  695. # BNF: declaration_list : declaration+
  696. def _parse_declaration_list(self) -> List[c_ast.Node]:
  697. decls = []
  698. while self._starts_declaration():
  699. decls.extend(self._parse_declaration())
  700. return decls
  701. # BNF: declaration_specifiers : (storage_class_specifier
  702. # | type_specifier
  703. # | type_qualifier
  704. # | function_specifier
  705. # | alignment_specifier)+
  706. def _parse_declaration_specifiers(
  707. self, allow_no_type: bool = False
  708. ) -> Tuple["_DeclSpec", bool, Optional[Coord]]:
  709. """Parse declaration-specifier sequence.
  710. allow_no_type:
  711. If True, allow a missing type specifier without error.
  712. Returns:
  713. (spec, saw_type, first_coord) where spec is a dict with
  714. qual/storage/type/function/alignment entries, saw_type is True
  715. if a type specifier was consumed, and first_coord is the coord
  716. of the first specifier token (used for diagnostics).
  717. """
  718. spec = None
  719. saw_type = False
  720. first_coord = None
  721. while True:
  722. tok = self._peek()
  723. if tok is None:
  724. break
  725. if tok.type == "_ALIGNAS":
  726. if first_coord is None:
  727. first_coord = self._tok_coord(tok)
  728. spec = self._add_declaration_specifier(
  729. spec, self._parse_alignment_specifier(), "alignment", append=True
  730. )
  731. continue
  732. if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
  733. if first_coord is None:
  734. first_coord = self._tok_coord(tok)
  735. spec = self._add_declaration_specifier(
  736. spec, self._parse_atomic_specifier(), "type", append=True
  737. )
  738. saw_type = True
  739. continue
  740. if tok.type in _TYPE_QUALIFIER:
  741. if first_coord is None:
  742. first_coord = self._tok_coord(tok)
  743. spec = self._add_declaration_specifier(
  744. spec, self._advance().value, "qual", append=True
  745. )
  746. continue
  747. if tok.type in _STORAGE_CLASS:
  748. if first_coord is None:
  749. first_coord = self._tok_coord(tok)
  750. spec = self._add_declaration_specifier(
  751. spec, self._advance().value, "storage", append=True
  752. )
  753. continue
  754. if tok.type in _FUNCTION_SPEC:
  755. if first_coord is None:
  756. first_coord = self._tok_coord(tok)
  757. spec = self._add_declaration_specifier(
  758. spec, self._advance().value, "function", append=True
  759. )
  760. continue
  761. if tok.type in _TYPE_SPEC_SIMPLE:
  762. if first_coord is None:
  763. first_coord = self._tok_coord(tok)
  764. tok = self._advance()
  765. spec = self._add_declaration_specifier(
  766. spec,
  767. c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
  768. "type",
  769. append=True,
  770. )
  771. saw_type = True
  772. continue
  773. if tok.type == "TYPEID":
  774. if saw_type:
  775. break
  776. if first_coord is None:
  777. first_coord = self._tok_coord(tok)
  778. tok = self._advance()
  779. spec = self._add_declaration_specifier(
  780. spec,
  781. c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
  782. "type",
  783. append=True,
  784. )
  785. saw_type = True
  786. continue
  787. if tok.type in {"STRUCT", "UNION"}:
  788. if first_coord is None:
  789. first_coord = self._tok_coord(tok)
  790. spec = self._add_declaration_specifier(
  791. spec, self._parse_struct_or_union_specifier(), "type", append=True
  792. )
  793. saw_type = True
  794. continue
  795. if tok.type == "ENUM":
  796. if first_coord is None:
  797. first_coord = self._tok_coord(tok)
  798. spec = self._add_declaration_specifier(
  799. spec, self._parse_enum_specifier(), "type", append=True
  800. )
  801. saw_type = True
  802. continue
  803. break
  804. if spec is None:
  805. self._parse_error("Invalid declaration", self.clex.filename)
  806. if not saw_type and not allow_no_type:
  807. self._parse_error("Missing type in declaration", first_coord)
  808. return spec, saw_type, first_coord
  809. # BNF: specifier_qualifier_list : (type_specifier
  810. # | type_qualifier
  811. # | alignment_specifier)+
  812. def _parse_specifier_qualifier_list(self) -> "_DeclSpec":
  813. spec = None
  814. saw_type = False
  815. saw_alignment = False
  816. first_coord = None
  817. while True:
  818. tok = self._peek()
  819. if tok is None:
  820. break
  821. if tok.type == "_ALIGNAS":
  822. if first_coord is None:
  823. first_coord = self._tok_coord(tok)
  824. spec = self._add_declaration_specifier(
  825. spec, self._parse_alignment_specifier(), "alignment", append=True
  826. )
  827. saw_alignment = True
  828. continue
  829. if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
  830. if first_coord is None:
  831. first_coord = self._tok_coord(tok)
  832. spec = self._add_declaration_specifier(
  833. spec, self._parse_atomic_specifier(), "type", append=True
  834. )
  835. saw_type = True
  836. continue
  837. if tok.type in _TYPE_QUALIFIER:
  838. if first_coord is None:
  839. first_coord = self._tok_coord(tok)
  840. spec = self._add_declaration_specifier(
  841. spec, self._advance().value, "qual", append=True
  842. )
  843. continue
  844. if tok.type in _TYPE_SPEC_SIMPLE:
  845. if first_coord is None:
  846. first_coord = self._tok_coord(tok)
  847. tok = self._advance()
  848. spec = self._add_declaration_specifier(
  849. spec,
  850. c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
  851. "type",
  852. append=True,
  853. )
  854. saw_type = True
  855. continue
  856. if tok.type == "TYPEID":
  857. if saw_type:
  858. break
  859. if first_coord is None:
  860. first_coord = self._tok_coord(tok)
  861. tok = self._advance()
  862. spec = self._add_declaration_specifier(
  863. spec,
  864. c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
  865. "type",
  866. append=True,
  867. )
  868. saw_type = True
  869. continue
  870. if tok.type in {"STRUCT", "UNION"}:
  871. if first_coord is None:
  872. first_coord = self._tok_coord(tok)
  873. spec = self._add_declaration_specifier(
  874. spec, self._parse_struct_or_union_specifier(), "type", append=True
  875. )
  876. saw_type = True
  877. continue
  878. if tok.type == "ENUM":
  879. if first_coord is None:
  880. first_coord = self._tok_coord(tok)
  881. spec = self._add_declaration_specifier(
  882. spec, self._parse_enum_specifier(), "type", append=True
  883. )
  884. saw_type = True
  885. continue
  886. break
  887. if spec is None:
  888. self._parse_error("Invalid specifier list", self.clex.filename)
  889. if not saw_type and not saw_alignment:
  890. self._parse_error("Missing type in declaration", first_coord)
  891. if spec.get("storage") is None:
  892. spec["storage"] = []
  893. if spec.get("function") is None:
  894. spec["function"] = []
  895. return spec
  896. # BNF: type_qualifier_list : type_qualifier+
  897. def _parse_type_qualifier_list(self) -> List[str]:
  898. quals = []
  899. while self._peek_type() in _TYPE_QUALIFIER:
  900. quals.append(self._advance().value)
  901. return quals
  902. # BNF: alignment_specifier : _ALIGNAS '(' type_name | constant_expression ')'
  903. def _parse_alignment_specifier(self) -> c_ast.Node:
  904. tok = self._expect("_ALIGNAS")
  905. self._expect("LPAREN")
  906. if self._starts_declaration():
  907. typ = self._parse_type_name()
  908. self._expect("RPAREN")
  909. return c_ast.Alignas(typ, self._tok_coord(tok))
  910. expr = self._parse_constant_expression()
  911. self._expect("RPAREN")
  912. return c_ast.Alignas(expr, self._tok_coord(tok))
  913. # BNF: atomic_specifier : _ATOMIC '(' type_name ')'
  914. def _parse_atomic_specifier(self) -> c_ast.Node:
  915. self._expect("_ATOMIC")
  916. self._expect("LPAREN")
  917. typ = self._parse_type_name()
  918. self._expect("RPAREN")
  919. typ.quals.append("_Atomic")
  920. return typ
  921. # BNF: init_declarator_list : init_declarator (',' init_declarator)*
  922. def _parse_init_declarator_list(
  923. self, first: Optional["_DeclInfo"] = None, id_only: bool = False
  924. ) -> List["_DeclInfo"]:
  925. decls = (
  926. [first]
  927. if first is not None
  928. else [self._parse_init_declarator(id_only=id_only)]
  929. )
  930. while self._accept("COMMA"):
  931. decls.append(self._parse_init_declarator(id_only=id_only))
  932. return decls
  933. # BNF: init_declarator : declarator ('=' initializer)?
  934. def _parse_init_declarator(self, id_only: bool = False) -> "_DeclInfo":
  935. decl = self._parse_id_declarator() if id_only else self._parse_declarator()
  936. init = None
  937. if self._accept("EQUALS"):
  938. init = self._parse_initializer()
  939. return dict(decl=decl, init=init, bitsize=None)
  940. # ------------------------------------------------------------------
  941. # Structs/unions/enums
  942. # ------------------------------------------------------------------
  943. # BNF: struct_or_union_specifier : struct_or_union ID? '{' struct_declaration_list? '}'
  944. # | struct_or_union ID
  945. def _parse_struct_or_union_specifier(self) -> c_ast.Node:
  946. tok = self._advance()
  947. klass = self._select_struct_union_class(tok.value)
  948. if self._peek_type() in {"ID", "TYPEID"}:
  949. name_tok = self._advance()
  950. if self._peek_type() == "LBRACE":
  951. self._advance()
  952. if self._accept("RBRACE"):
  953. return klass(
  954. name=name_tok.value, decls=[], coord=self._tok_coord(name_tok)
  955. )
  956. decls = self._parse_struct_declaration_list()
  957. self._expect("RBRACE")
  958. return klass(
  959. name=name_tok.value, decls=decls, coord=self._tok_coord(name_tok)
  960. )
  961. return klass(
  962. name=name_tok.value, decls=None, coord=self._tok_coord(name_tok)
  963. )
  964. if self._peek_type() == "LBRACE":
  965. brace_tok = self._advance()
  966. if self._accept("RBRACE"):
  967. return klass(name=None, decls=[], coord=self._tok_coord(brace_tok))
  968. decls = self._parse_struct_declaration_list()
  969. self._expect("RBRACE")
  970. return klass(name=None, decls=decls, coord=self._tok_coord(brace_tok))
  971. self._parse_error("Invalid struct/union declaration", self._tok_coord(tok))
  972. # BNF: struct_declaration_list : struct_declaration+
  973. def _parse_struct_declaration_list(self) -> List[c_ast.Node]:
  974. decls = []
  975. while self._peek_type() not in {None, "RBRACE"}:
  976. items = self._parse_struct_declaration()
  977. if items is None:
  978. continue
  979. decls.extend(items)
  980. return decls
  981. # BNF: struct_declaration : specifier_qualifier_list struct_declarator_list? ';'
  982. # | static_assert
  983. # | pppragma_directive
  984. def _parse_struct_declaration(self) -> Optional[List[c_ast.Node]]:
  985. if self._peek_type() == "SEMI":
  986. self._advance()
  987. return None
  988. if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
  989. return [self._parse_pppragma_directive()]
  990. spec = self._parse_specifier_qualifier_list()
  991. assert "typedef" not in spec.get("storage", [])
  992. decls = None
  993. if self._starts_declarator() or self._peek_type() == "COLON":
  994. decls = self._parse_struct_declarator_list()
  995. if decls is not None:
  996. self._expect("SEMI")
  997. return self._build_declarations(spec=spec, decls=decls)
  998. if len(spec["type"]) == 1:
  999. node = spec["type"][0]
  1000. if isinstance(node, c_ast.Node):
  1001. decl_type = node
  1002. else:
  1003. decl_type = c_ast.IdentifierType(node)
  1004. self._expect("SEMI")
  1005. return self._build_declarations(
  1006. spec=spec, decls=[dict(decl=decl_type, init=None, bitsize=None)]
  1007. )
  1008. self._expect("SEMI")
  1009. return self._build_declarations(
  1010. spec=spec, decls=[dict(decl=None, init=None, bitsize=None)]
  1011. )
  1012. # BNF: struct_declarator_list : struct_declarator (',' struct_declarator)*
  1013. def _parse_struct_declarator_list(self) -> List["_DeclInfo"]:
  1014. decls = [self._parse_struct_declarator()]
  1015. while self._accept("COMMA"):
  1016. decls.append(self._parse_struct_declarator())
  1017. return decls
  1018. # BNF: struct_declarator : declarator? ':' constant_expression
  1019. # | declarator (':' constant_expression)?
  1020. def _parse_struct_declarator(self) -> "_DeclInfo":
  1021. if self._accept("COLON"):
  1022. bitsize = self._parse_constant_expression()
  1023. return {
  1024. "decl": c_ast.TypeDecl(None, None, None, None),
  1025. "init": None,
  1026. "bitsize": bitsize,
  1027. }
  1028. decl = self._parse_declarator()
  1029. if self._accept("COLON"):
  1030. bitsize = self._parse_constant_expression()
  1031. return {"decl": decl, "init": None, "bitsize": bitsize}
  1032. return {"decl": decl, "init": None, "bitsize": None}
  1033. # BNF: enum_specifier : ENUM ID? '{' enumerator_list? '}'
  1034. # | ENUM ID
  1035. def _parse_enum_specifier(self) -> c_ast.Node:
  1036. tok = self._expect("ENUM")
  1037. if self._peek_type() in {"ID", "TYPEID"}:
  1038. name_tok = self._advance()
  1039. if self._peek_type() == "LBRACE":
  1040. self._advance()
  1041. enums = self._parse_enumerator_list()
  1042. self._expect("RBRACE")
  1043. return c_ast.Enum(name_tok.value, enums, self._tok_coord(tok))
  1044. return c_ast.Enum(name_tok.value, None, self._tok_coord(tok))
  1045. self._expect("LBRACE")
  1046. enums = self._parse_enumerator_list()
  1047. self._expect("RBRACE")
  1048. return c_ast.Enum(None, enums, self._tok_coord(tok))
  1049. # BNF: enumerator_list : enumerator (',' enumerator)* ','?
  1050. def _parse_enumerator_list(self) -> c_ast.Node:
  1051. enum = self._parse_enumerator()
  1052. enum_list = c_ast.EnumeratorList([enum], enum.coord)
  1053. while self._accept("COMMA"):
  1054. if self._peek_type() == "RBRACE":
  1055. break
  1056. enum = self._parse_enumerator()
  1057. enum_list.enumerators.append(enum)
  1058. return enum_list
  1059. # BNF: enumerator : ID ('=' constant_expression)?
  1060. def _parse_enumerator(self) -> c_ast.Node:
  1061. name_tok = self._expect("ID")
  1062. if self._accept("EQUALS"):
  1063. value = self._parse_constant_expression()
  1064. else:
  1065. value = None
  1066. enum = c_ast.Enumerator(name_tok.value, value, self._tok_coord(name_tok))
  1067. self._add_identifier(enum.name, enum.coord)
  1068. return enum
  1069. # ------------------------------------------------------------------
  1070. # Declarators
  1071. # ------------------------------------------------------------------
  1072. # BNF: declarator : pointer? direct_declarator
  1073. def _parse_declarator(self) -> c_ast.Node:
  1074. decl, _ = self._parse_any_declarator(
  1075. allow_abstract=False, typeid_paren_as_abstract=False
  1076. )
  1077. assert decl is not None
  1078. return decl
  1079. # BNF: id_declarator : declarator with ID name
  1080. def _parse_id_declarator(self) -> c_ast.Node:
  1081. return self._parse_declarator_kind(kind="id", allow_paren=True)
  1082. # BNF: typeid_declarator : declarator with TYPEID name
  1083. def _parse_typeid_declarator(self) -> c_ast.Node:
  1084. return self._parse_declarator_kind(kind="typeid", allow_paren=True)
  1085. # BNF: typeid_noparen_declarator : declarator without parenthesized name
  1086. def _parse_typeid_noparen_declarator(self) -> c_ast.Node:
  1087. return self._parse_declarator_kind(kind="typeid", allow_paren=False)
  1088. # BNF: declarator_kind : pointer? direct_declarator(kind)
  1089. def _parse_declarator_kind(self, kind: str, allow_paren: bool) -> c_ast.Node:
  1090. ptr = None
  1091. if self._peek_type() == "TIMES":
  1092. ptr = self._parse_pointer()
  1093. direct = self._parse_direct_declarator(kind, allow_paren=allow_paren)
  1094. if ptr is not None:
  1095. return self._type_modify_decl(direct, ptr)
  1096. return direct
  1097. # BNF: direct_declarator : ID | TYPEID | '(' declarator ')'
  1098. # | direct_declarator '[' ... ']'
  1099. # | direct_declarator '(' ... ')'
  1100. def _parse_direct_declarator(
  1101. self, kind: str, allow_paren: bool = True
  1102. ) -> c_ast.Node:
  1103. if allow_paren and self._accept("LPAREN"):
  1104. decl = self._parse_declarator_kind(kind, allow_paren=True)
  1105. self._expect("RPAREN")
  1106. else:
  1107. if kind == "id":
  1108. name_tok = self._expect("ID")
  1109. else:
  1110. name_tok = self._expect("TYPEID")
  1111. decl = c_ast.TypeDecl(
  1112. declname=name_tok.value,
  1113. type=None,
  1114. quals=None,
  1115. align=None,
  1116. coord=self._tok_coord(name_tok),
  1117. )
  1118. return self._parse_decl_suffixes(decl)
  1119. def _parse_decl_suffixes(self, decl: c_ast.Node) -> c_ast.Node:
  1120. """Parse a chain of array/function suffixes and attach them to decl."""
  1121. while True:
  1122. if self._peek_type() == "LBRACKET":
  1123. decl = self._type_modify_decl(decl, self._parse_array_decl(decl))
  1124. continue
  1125. if self._peek_type() == "LPAREN":
  1126. func = self._parse_function_decl(decl)
  1127. decl = self._type_modify_decl(decl, func)
  1128. continue
  1129. break
  1130. return decl
  1131. # BNF: array_decl : '[' array_specifiers? assignment_expression? ']'
  1132. def _parse_array_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
  1133. return self._parse_array_decl_common(base_type=None, coord=base_decl.coord)
  1134. def _parse_array_decl_common(
  1135. self, base_type: Optional[c_ast.Node], coord: Optional[Coord] = None
  1136. ) -> c_ast.Node:
  1137. """Parse an array declarator suffix and return an ArrayDecl node.
  1138. base_type:
  1139. Base declarator node to attach (None for direct-declarator parsing,
  1140. TypeDecl for abstract declarators).
  1141. coord:
  1142. Coordinate to use for the ArrayDecl. If None, uses the '[' token.
  1143. """
  1144. lbrack_tok = self._expect("LBRACKET")
  1145. if coord is None:
  1146. coord = self._tok_coord(lbrack_tok)
  1147. def make_array_decl(dim, dim_quals):
  1148. return c_ast.ArrayDecl(
  1149. type=base_type, dim=dim, dim_quals=dim_quals, coord=coord
  1150. )
  1151. if self._accept("STATIC"):
  1152. dim_quals = ["static"] + (self._parse_type_qualifier_list() or [])
  1153. dim = self._parse_assignment_expression()
  1154. self._expect("RBRACKET")
  1155. return make_array_decl(dim, dim_quals)
  1156. if self._peek_type() in _TYPE_QUALIFIER:
  1157. dim_quals = self._parse_type_qualifier_list() or []
  1158. if self._accept("STATIC"):
  1159. dim_quals = dim_quals + ["static"]
  1160. dim = self._parse_assignment_expression()
  1161. self._expect("RBRACKET")
  1162. return make_array_decl(dim, dim_quals)
  1163. times_tok = self._accept("TIMES")
  1164. if times_tok:
  1165. self._expect("RBRACKET")
  1166. dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
  1167. return make_array_decl(dim, dim_quals)
  1168. dim = None
  1169. if self._starts_expression():
  1170. dim = self._parse_assignment_expression()
  1171. self._expect("RBRACKET")
  1172. return make_array_decl(dim, dim_quals)
  1173. times_tok = self._accept("TIMES")
  1174. if times_tok:
  1175. self._expect("RBRACKET")
  1176. dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
  1177. return make_array_decl(dim, [])
  1178. dim = None
  1179. if self._starts_expression():
  1180. dim = self._parse_assignment_expression()
  1181. self._expect("RBRACKET")
  1182. return make_array_decl(dim, [])
  1183. # BNF: function_decl : '(' parameter_type_list_opt | identifier_list_opt ')'
  1184. def _parse_function_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
  1185. self._expect("LPAREN")
  1186. if self._accept("RPAREN"):
  1187. args = None
  1188. else:
  1189. args = (
  1190. self._parse_parameter_type_list()
  1191. if self._starts_declaration()
  1192. else self._parse_identifier_list_opt()
  1193. )
  1194. self._expect("RPAREN")
  1195. func = c_ast.FuncDecl(args=args, type=None, coord=base_decl.coord)
  1196. if self._peek_type() == "LBRACE":
  1197. if func.args is not None:
  1198. for param in func.args.params:
  1199. if isinstance(param, c_ast.EllipsisParam):
  1200. break
  1201. name = getattr(param, "name", None)
  1202. if name:
  1203. self._add_identifier(name, param.coord)
  1204. return func
  1205. # BNF: pointer : '*' type_qualifier_list? pointer?
  1206. def _parse_pointer(self) -> Optional[c_ast.Node]:
  1207. stars = []
  1208. times_tok = self._accept("TIMES")
  1209. while times_tok:
  1210. quals = self._parse_type_qualifier_list() or []
  1211. stars.append((quals, self._tok_coord(times_tok)))
  1212. times_tok = self._accept("TIMES")
  1213. if not stars:
  1214. return None
  1215. ptr = None
  1216. for quals, coord in stars:
  1217. ptr = c_ast.PtrDecl(quals=quals, type=ptr, coord=coord)
  1218. return ptr
  1219. # BNF: parameter_type_list : parameter_list (',' ELLIPSIS)?
  1220. def _parse_parameter_type_list(self) -> c_ast.ParamList:
  1221. params = self._parse_parameter_list()
  1222. if self._peek_type() == "COMMA" and self._peek_type(2) == "ELLIPSIS":
  1223. self._advance()
  1224. ell_tok = self._advance()
  1225. params.params.append(c_ast.EllipsisParam(self._tok_coord(ell_tok)))
  1226. return params
  1227. # BNF: parameter_list : parameter_declaration (',' parameter_declaration)*
  1228. def _parse_parameter_list(self) -> c_ast.ParamList:
  1229. first = self._parse_parameter_declaration()
  1230. params = c_ast.ParamList([first], first.coord)
  1231. while self._peek_type() == "COMMA" and self._peek_type(2) != "ELLIPSIS":
  1232. self._advance()
  1233. params.params.append(self._parse_parameter_declaration())
  1234. return params
  1235. # BNF: parameter_declaration : declaration_specifiers declarator?
  1236. # | declaration_specifiers abstract_declarator_opt
  1237. def _parse_parameter_declaration(self) -> c_ast.Node:
  1238. spec, _, spec_coord = self._parse_declaration_specifiers(allow_no_type=True)
  1239. if not spec["type"]:
  1240. spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
  1241. if self._starts_declarator():
  1242. decl, is_named = self._parse_any_declarator(
  1243. allow_abstract=True, typeid_paren_as_abstract=True
  1244. )
  1245. if is_named:
  1246. return self._build_declarations(
  1247. spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
  1248. )[0]
  1249. return self._build_parameter_declaration(spec, decl, spec_coord)
  1250. decl = self._parse_abstract_declarator_opt()
  1251. return self._build_parameter_declaration(spec, decl, spec_coord)
  1252. def _build_parameter_declaration(
  1253. self, spec: "_DeclSpec", decl: Optional[c_ast.Node], spec_coord: Optional[Coord]
  1254. ) -> c_ast.Node:
  1255. if (
  1256. len(spec["type"]) > 1
  1257. and len(spec["type"][-1].names) == 1
  1258. and self._is_type_in_scope(spec["type"][-1].names[0])
  1259. ):
  1260. return self._build_declarations(
  1261. spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
  1262. )[0]
  1263. decl = c_ast.Typename(
  1264. name="",
  1265. quals=spec["qual"],
  1266. align=None,
  1267. type=decl or c_ast.TypeDecl(None, None, None, None),
  1268. coord=spec_coord,
  1269. )
  1270. return self._fix_decl_name_type(decl, spec["type"])
  1271. # BNF: identifier_list_opt : identifier_list | empty
  1272. def _parse_identifier_list_opt(self) -> Optional[c_ast.Node]:
  1273. if self._peek_type() == "RPAREN":
  1274. return None
  1275. return self._parse_identifier_list()
  1276. # BNF: identifier_list : identifier (',' identifier)*
  1277. def _parse_identifier_list(self) -> c_ast.Node:
  1278. first = self._parse_identifier()
  1279. params = c_ast.ParamList([first], first.coord)
  1280. while self._accept("COMMA"):
  1281. params.params.append(self._parse_identifier())
  1282. return params
  1283. # ------------------------------------------------------------------
  1284. # Abstract declarators
  1285. # ------------------------------------------------------------------
  1286. # BNF: type_name : specifier_qualifier_list abstract_declarator_opt
  1287. def _parse_type_name(self) -> c_ast.Typename:
  1288. spec = self._parse_specifier_qualifier_list()
  1289. decl = self._parse_abstract_declarator_opt()
  1290. coord = None
  1291. if decl is not None:
  1292. coord = decl.coord
  1293. elif spec["type"]:
  1294. coord = spec["type"][0].coord
  1295. typename = c_ast.Typename(
  1296. name="",
  1297. quals=spec["qual"][:],
  1298. align=None,
  1299. type=decl or c_ast.TypeDecl(None, None, None, None),
  1300. coord=coord,
  1301. )
  1302. return cast(c_ast.Typename, self._fix_decl_name_type(typename, spec["type"]))
  1303. # BNF: abstract_declarator_opt : pointer? direct_abstract_declarator?
  1304. def _parse_abstract_declarator_opt(self) -> Optional[c_ast.Node]:
  1305. if self._peek_type() == "TIMES":
  1306. ptr = self._parse_pointer()
  1307. if self._starts_direct_abstract_declarator():
  1308. decl = self._parse_direct_abstract_declarator()
  1309. else:
  1310. decl = c_ast.TypeDecl(None, None, None, None)
  1311. assert ptr is not None
  1312. return self._type_modify_decl(decl, ptr)
  1313. if self._starts_direct_abstract_declarator():
  1314. return self._parse_direct_abstract_declarator()
  1315. return None
  1316. # BNF: direct_abstract_declarator : '(' parameter_type_list_opt ')'
  1317. # | '(' abstract_declarator ')'
  1318. # | '[' ... ']'
  1319. def _parse_direct_abstract_declarator(self) -> c_ast.Node:
  1320. lparen_tok = self._accept("LPAREN")
  1321. if lparen_tok:
  1322. if self._starts_declaration() or self._peek_type() == "RPAREN":
  1323. params = self._parse_parameter_type_list_opt()
  1324. self._expect("RPAREN")
  1325. decl = c_ast.FuncDecl(
  1326. args=params,
  1327. type=c_ast.TypeDecl(None, None, None, None),
  1328. coord=self._tok_coord(lparen_tok),
  1329. )
  1330. else:
  1331. decl = self._parse_abstract_declarator_opt()
  1332. self._expect("RPAREN")
  1333. assert decl is not None
  1334. elif self._peek_type() == "LBRACKET":
  1335. decl = self._parse_abstract_array_base()
  1336. else:
  1337. self._parse_error("Invalid abstract declarator", self.clex.filename)
  1338. return self._parse_decl_suffixes(decl)
  1339. # BNF: parameter_type_list_opt : parameter_type_list | empty
  1340. def _parse_parameter_type_list_opt(self) -> Optional[c_ast.ParamList]:
  1341. if self._peek_type() == "RPAREN":
  1342. return None
  1343. return self._parse_parameter_type_list()
  1344. # BNF: abstract_array_base : '[' array_specifiers? assignment_expression? ']'
  1345. def _parse_abstract_array_base(self) -> c_ast.Node:
  1346. return self._parse_array_decl_common(
  1347. base_type=c_ast.TypeDecl(None, None, None, None), coord=None
  1348. )
  1349. # ------------------------------------------------------------------
  1350. # Statements
  1351. # ------------------------------------------------------------------
  1352. # BNF: statement : labeled_statement | compound_statement
  1353. # | selection_statement | iteration_statement
  1354. # | jump_statement | expression_statement
  1355. # | static_assert | pppragma_directive
  1356. def _parse_statement(self) -> c_ast.Node | List[c_ast.Node]:
  1357. tok_type = self._peek_type()
  1358. match tok_type:
  1359. case "CASE" | "DEFAULT":
  1360. return self._parse_labeled_statement()
  1361. case "ID" if self._peek_type(2) == "COLON":
  1362. return self._parse_labeled_statement()
  1363. case "LBRACE":
  1364. return self._parse_compound_statement()
  1365. case "IF" | "SWITCH":
  1366. return self._parse_selection_statement()
  1367. case "WHILE" | "DO" | "FOR":
  1368. return self._parse_iteration_statement()
  1369. case "GOTO" | "BREAK" | "CONTINUE" | "RETURN":
  1370. return self._parse_jump_statement()
  1371. case "PPPRAGMA" | "_PRAGMA":
  1372. return self._parse_pppragma_directive()
  1373. case "_STATIC_ASSERT":
  1374. return self._parse_static_assert()
  1375. case _:
  1376. return self._parse_expression_statement()
  1377. # BNF: pragmacomp_or_statement : pppragma_directive* statement
  1378. def _parse_pragmacomp_or_statement(self) -> c_ast.Node | List[c_ast.Node]:
  1379. if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
  1380. pragmas = self._parse_pppragma_directive_list()
  1381. stmt = self._parse_statement()
  1382. return c_ast.Compound(block_items=pragmas + [stmt], coord=pragmas[0].coord)
  1383. return self._parse_statement()
  1384. # BNF: block_item : declaration | statement
  1385. def _parse_block_item(self) -> c_ast.Node | List[c_ast.Node]:
  1386. if self._starts_declaration():
  1387. return self._parse_declaration()
  1388. return self._parse_statement()
  1389. # BNF: block_item_list : block_item+
  1390. def _parse_block_item_list(self) -> List[c_ast.Node]:
  1391. items = []
  1392. while self._peek_type() not in {"RBRACE", None}:
  1393. item = self._parse_block_item()
  1394. if isinstance(item, list):
  1395. if item == [None]:
  1396. continue
  1397. items.extend(item)
  1398. else:
  1399. items.append(item)
  1400. return items
  1401. # BNF: compound_statement : '{' block_item_list? '}'
  1402. def _parse_compound_statement(self) -> c_ast.Node:
  1403. lbrace_tok = self._expect("LBRACE")
  1404. if self._accept("RBRACE"):
  1405. return c_ast.Compound(block_items=None, coord=self._tok_coord(lbrace_tok))
  1406. block_items = self._parse_block_item_list()
  1407. self._expect("RBRACE")
  1408. return c_ast.Compound(
  1409. block_items=block_items, coord=self._tok_coord(lbrace_tok)
  1410. )
  1411. # BNF: labeled_statement : ID ':' statement
  1412. # | CASE constant_expression ':' statement
  1413. # | DEFAULT ':' statement
  1414. def _parse_labeled_statement(self) -> c_ast.Node:
  1415. tok_type = self._peek_type()
  1416. match tok_type:
  1417. case "ID":
  1418. name_tok = self._advance()
  1419. self._expect("COLON")
  1420. if self._starts_statement():
  1421. stmt = self._parse_pragmacomp_or_statement()
  1422. else:
  1423. stmt = c_ast.EmptyStatement(self._tok_coord(name_tok))
  1424. return c_ast.Label(name_tok.value, stmt, self._tok_coord(name_tok))
  1425. case "CASE":
  1426. case_tok = self._advance()
  1427. expr = self._parse_constant_expression()
  1428. self._expect("COLON")
  1429. if self._starts_statement():
  1430. stmt = self._parse_pragmacomp_or_statement()
  1431. else:
  1432. stmt = c_ast.EmptyStatement(self._tok_coord(case_tok))
  1433. return c_ast.Case(expr, [stmt], self._tok_coord(case_tok))
  1434. case "DEFAULT":
  1435. def_tok = self._advance()
  1436. self._expect("COLON")
  1437. if self._starts_statement():
  1438. stmt = self._parse_pragmacomp_or_statement()
  1439. else:
  1440. stmt = c_ast.EmptyStatement(self._tok_coord(def_tok))
  1441. return c_ast.Default([stmt], self._tok_coord(def_tok))
  1442. case _:
  1443. self._parse_error("Invalid labeled statement", self.clex.filename)
  1444. # BNF: selection_statement : IF '(' expression ')' statement (ELSE statement)?
  1445. # | SWITCH '(' expression ')' statement
  1446. def _parse_selection_statement(self) -> c_ast.Node:
  1447. tok = self._advance()
  1448. match tok.type:
  1449. case "IF":
  1450. self._expect("LPAREN")
  1451. cond = self._parse_expression()
  1452. self._expect("RPAREN")
  1453. then_stmt = self._parse_pragmacomp_or_statement()
  1454. if self._accept("ELSE"):
  1455. else_stmt = self._parse_pragmacomp_or_statement()
  1456. return c_ast.If(cond, then_stmt, else_stmt, self._tok_coord(tok))
  1457. return c_ast.If(cond, then_stmt, None, self._tok_coord(tok))
  1458. case "SWITCH":
  1459. self._expect("LPAREN")
  1460. expr = self._parse_expression()
  1461. self._expect("RPAREN")
  1462. stmt = self._parse_pragmacomp_or_statement()
  1463. return fix_switch_cases(c_ast.Switch(expr, stmt, self._tok_coord(tok)))
  1464. case _:
  1465. self._parse_error("Invalid selection statement", self._tok_coord(tok))
  1466. # BNF: iteration_statement : WHILE '(' expression ')' statement
  1467. # | DO statement WHILE '(' expression ')' ';'
  1468. # | FOR '(' (declaration | expression_opt) ';'
  1469. # expression_opt ';' expression_opt ')' statement
  1470. def _parse_iteration_statement(self) -> c_ast.Node:
  1471. tok = self._advance()
  1472. match tok.type:
  1473. case "WHILE":
  1474. self._expect("LPAREN")
  1475. cond = self._parse_expression()
  1476. self._expect("RPAREN")
  1477. stmt = self._parse_pragmacomp_or_statement()
  1478. return c_ast.While(cond, stmt, self._tok_coord(tok))
  1479. case "DO":
  1480. stmt = self._parse_pragmacomp_or_statement()
  1481. self._expect("WHILE")
  1482. self._expect("LPAREN")
  1483. cond = self._parse_expression()
  1484. self._expect("RPAREN")
  1485. self._expect("SEMI")
  1486. return c_ast.DoWhile(cond, stmt, self._tok_coord(tok))
  1487. case "FOR":
  1488. self._expect("LPAREN")
  1489. if self._starts_declaration():
  1490. decls = self._parse_declaration()
  1491. init = c_ast.DeclList(decls, self._tok_coord(tok))
  1492. cond = self._parse_expression_opt()
  1493. self._expect("SEMI")
  1494. next_expr = self._parse_expression_opt()
  1495. self._expect("RPAREN")
  1496. stmt = self._parse_pragmacomp_or_statement()
  1497. return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
  1498. init = self._parse_expression_opt()
  1499. self._expect("SEMI")
  1500. cond = self._parse_expression_opt()
  1501. self._expect("SEMI")
  1502. next_expr = self._parse_expression_opt()
  1503. self._expect("RPAREN")
  1504. stmt = self._parse_pragmacomp_or_statement()
  1505. return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
  1506. case _:
  1507. self._parse_error("Invalid iteration statement", self._tok_coord(tok))
  1508. # BNF: jump_statement : GOTO ID ';' | BREAK ';' | CONTINUE ';'
  1509. # | RETURN expression? ';'
  1510. def _parse_jump_statement(self) -> c_ast.Node:
  1511. tok = self._advance()
  1512. match tok.type:
  1513. case "GOTO":
  1514. name_tok = self._expect("ID")
  1515. self._expect("SEMI")
  1516. return c_ast.Goto(name_tok.value, self._tok_coord(tok))
  1517. case "BREAK":
  1518. self._expect("SEMI")
  1519. return c_ast.Break(self._tok_coord(tok))
  1520. case "CONTINUE":
  1521. self._expect("SEMI")
  1522. return c_ast.Continue(self._tok_coord(tok))
  1523. case "RETURN":
  1524. if self._accept("SEMI"):
  1525. return c_ast.Return(None, self._tok_coord(tok))
  1526. expr = self._parse_expression()
  1527. self._expect("SEMI")
  1528. return c_ast.Return(expr, self._tok_coord(tok))
  1529. case _:
  1530. self._parse_error("Invalid jump statement", self._tok_coord(tok))
  1531. # BNF: expression_statement : expression_opt ';'
  1532. def _parse_expression_statement(self) -> c_ast.Node:
  1533. expr = self._parse_expression_opt()
  1534. semi_tok = self._expect("SEMI")
  1535. if expr is None:
  1536. return c_ast.EmptyStatement(self._tok_coord(semi_tok))
  1537. return expr
  1538. # ------------------------------------------------------------------
  1539. # Expressions
  1540. # ------------------------------------------------------------------
  1541. # BNF: expression_opt : expression | empty
  1542. def _parse_expression_opt(self) -> Optional[c_ast.Node]:
  1543. if self._starts_expression():
  1544. return self._parse_expression()
  1545. return None
  1546. # BNF: expression : assignment_expression (',' assignment_expression)*
  1547. def _parse_expression(self) -> c_ast.Node:
  1548. expr = self._parse_assignment_expression()
  1549. if not self._accept("COMMA"):
  1550. return expr
  1551. exprs = [expr, self._parse_assignment_expression()]
  1552. while self._accept("COMMA"):
  1553. exprs.append(self._parse_assignment_expression())
  1554. return c_ast.ExprList(exprs, expr.coord)
  1555. # BNF: assignment_expression : conditional_expression
  1556. # | unary_expression assignment_op assignment_expression
  1557. def _parse_assignment_expression(self) -> c_ast.Node:
  1558. if self._peek_type() == "LPAREN" and self._peek_type(2) == "LBRACE":
  1559. self._advance()
  1560. comp = self._parse_compound_statement()
  1561. self._expect("RPAREN")
  1562. return comp
  1563. expr = self._parse_conditional_expression()
  1564. if self._is_assignment_op():
  1565. op = self._advance().value
  1566. rhs = self._parse_assignment_expression()
  1567. return c_ast.Assignment(op, expr, rhs, expr.coord)
  1568. return expr
  1569. # BNF: conditional_expression : binary_expression
  1570. # | binary_expression '?' expression ':' conditional_expression
  1571. def _parse_conditional_expression(self) -> c_ast.Node:
  1572. expr = self._parse_binary_expression()
  1573. if self._accept("CONDOP"):
  1574. iftrue = self._parse_expression()
  1575. self._expect("COLON")
  1576. iffalse = self._parse_conditional_expression()
  1577. return c_ast.TernaryOp(expr, iftrue, iffalse, expr.coord)
  1578. return expr
  1579. # BNF: binary_expression : cast_expression (binary_op cast_expression)*
  1580. def _parse_binary_expression(
  1581. self, min_prec: int = 0, lhs: Optional[c_ast.Node] = None
  1582. ) -> c_ast.Node:
  1583. if lhs is None:
  1584. lhs = self._parse_cast_expression()
  1585. while True:
  1586. tok = self._peek()
  1587. if tok is None or tok.type not in _BINARY_PRECEDENCE:
  1588. break
  1589. prec = _BINARY_PRECEDENCE[tok.type]
  1590. if prec < min_prec:
  1591. break
  1592. op = tok.value
  1593. self._advance()
  1594. rhs = self._parse_cast_expression()
  1595. while True:
  1596. next_tok = self._peek()
  1597. if next_tok is None or next_tok.type not in _BINARY_PRECEDENCE:
  1598. break
  1599. next_prec = _BINARY_PRECEDENCE[next_tok.type]
  1600. if next_prec > prec:
  1601. rhs = self._parse_binary_expression(next_prec, rhs)
  1602. else:
  1603. break
  1604. lhs = c_ast.BinaryOp(op, lhs, rhs, lhs.coord)
  1605. return lhs
  1606. # BNF: cast_expression : '(' type_name ')' cast_expression
  1607. # | unary_expression
  1608. def _parse_cast_expression(self) -> c_ast.Node:
  1609. result = self._try_parse_paren_type_name()
  1610. if result is not None:
  1611. typ, mark, lparen_tok = result
  1612. if self._peek_type() == "LBRACE":
  1613. # (type){...} is a compound literal, not a cast. Examples:
  1614. # (int){1} -> compound literal, handled in postfix
  1615. # (int) x -> cast, handled below
  1616. self._reset(mark)
  1617. else:
  1618. expr = self._parse_cast_expression()
  1619. return c_ast.Cast(typ, expr, self._tok_coord(lparen_tok))
  1620. return self._parse_unary_expression()
  1621. # BNF: unary_expression : postfix_expression
  1622. # | '++' unary_expression
  1623. # | '--' unary_expression
  1624. # | unary_op cast_expression
  1625. # | 'sizeof' unary_expression
  1626. # | 'sizeof' '(' type_name ')'
  1627. # | '_Alignof' '(' type_name ')'
  1628. def _parse_unary_expression(self) -> c_ast.Node:
  1629. tok_type = self._peek_type()
  1630. if tok_type in {"PLUSPLUS", "MINUSMINUS"}:
  1631. tok = self._advance()
  1632. expr = self._parse_unary_expression()
  1633. return c_ast.UnaryOp(tok.value, expr, expr.coord)
  1634. if tok_type in {"AND", "TIMES", "PLUS", "MINUS", "NOT", "LNOT"}:
  1635. tok = self._advance()
  1636. expr = self._parse_cast_expression()
  1637. return c_ast.UnaryOp(tok.value, expr, expr.coord)
  1638. if tok_type == "SIZEOF":
  1639. tok = self._advance()
  1640. result = self._try_parse_paren_type_name()
  1641. if result is not None:
  1642. typ, _, _ = result
  1643. return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
  1644. expr = self._parse_unary_expression()
  1645. return c_ast.UnaryOp(tok.value, expr, self._tok_coord(tok))
  1646. if tok_type == "_ALIGNOF":
  1647. tok = self._advance()
  1648. self._expect("LPAREN")
  1649. typ = self._parse_type_name()
  1650. self._expect("RPAREN")
  1651. return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
  1652. return self._parse_postfix_expression()
  1653. # BNF: postfix_expression : primary_expression postfix_suffix*
  1654. # | '(' type_name ')' '{' initializer_list ','? '}'
  1655. def _parse_postfix_expression(self) -> c_ast.Node:
  1656. result = self._try_parse_paren_type_name()
  1657. if result is not None:
  1658. typ, mark, _ = result
  1659. # Disambiguate between casts and compound literals:
  1660. # (int) x -> cast
  1661. # (int) {1} -> compound literal
  1662. if self._accept("LBRACE"):
  1663. init = self._parse_initializer_list()
  1664. self._accept("COMMA")
  1665. self._expect("RBRACE")
  1666. return c_ast.CompoundLiteral(typ, init)
  1667. else:
  1668. self._reset(mark)
  1669. expr = self._parse_primary_expression()
  1670. while True:
  1671. if self._accept("LBRACKET"):
  1672. sub = self._parse_expression()
  1673. self._expect("RBRACKET")
  1674. expr = c_ast.ArrayRef(expr, sub, expr.coord)
  1675. continue
  1676. if self._accept("LPAREN"):
  1677. if self._peek_type() == "RPAREN":
  1678. self._advance()
  1679. args = None
  1680. else:
  1681. args = self._parse_argument_expression_list()
  1682. self._expect("RPAREN")
  1683. expr = c_ast.FuncCall(expr, args, expr.coord)
  1684. continue
  1685. if self._peek_type() in {"PERIOD", "ARROW"}:
  1686. op_tok = self._advance()
  1687. name_tok = self._advance()
  1688. if name_tok.type not in {"ID", "TYPEID"}:
  1689. self._parse_error(
  1690. "Invalid struct reference", self._tok_coord(name_tok)
  1691. )
  1692. field = c_ast.ID(name_tok.value, self._tok_coord(name_tok))
  1693. expr = c_ast.StructRef(expr, op_tok.value, field, expr.coord)
  1694. continue
  1695. if self._peek_type() in {"PLUSPLUS", "MINUSMINUS"}:
  1696. tok = self._advance()
  1697. expr = c_ast.UnaryOp("p" + tok.value, expr, expr.coord)
  1698. continue
  1699. break
  1700. return expr
  1701. # BNF: primary_expression : ID | constant | string_literal
  1702. # | '(' expression ')' | offsetof
  1703. def _parse_primary_expression(self) -> c_ast.Node:
  1704. tok_type = self._peek_type()
  1705. if tok_type == "ID":
  1706. return self._parse_identifier()
  1707. if (
  1708. tok_type in _INT_CONST
  1709. or tok_type in _FLOAT_CONST
  1710. or tok_type in _CHAR_CONST
  1711. ):
  1712. return self._parse_constant()
  1713. if tok_type in _STRING_LITERAL:
  1714. return self._parse_unified_string_literal()
  1715. if tok_type in _WSTR_LITERAL:
  1716. return self._parse_unified_wstring_literal()
  1717. if tok_type == "LPAREN":
  1718. self._advance()
  1719. expr = self._parse_expression()
  1720. self._expect("RPAREN")
  1721. return expr
  1722. if tok_type == "OFFSETOF":
  1723. off_tok = self._advance()
  1724. self._expect("LPAREN")
  1725. typ = self._parse_type_name()
  1726. self._expect("COMMA")
  1727. designator = self._parse_offsetof_member_designator()
  1728. self._expect("RPAREN")
  1729. coord = self._tok_coord(off_tok)
  1730. return c_ast.FuncCall(
  1731. c_ast.ID(off_tok.value, coord),
  1732. c_ast.ExprList([typ, designator], coord),
  1733. coord,
  1734. )
  1735. self._parse_error("Invalid expression", self.clex.filename)
  1736. # BNF: offsetof_member_designator : identifier_or_typeid
  1737. # ('.' identifier_or_typeid | '[' expression ']')*
  1738. def _parse_offsetof_member_designator(self) -> c_ast.Node:
  1739. node = self._parse_identifier_or_typeid()
  1740. while True:
  1741. if self._accept("PERIOD"):
  1742. field = self._parse_identifier_or_typeid()
  1743. node = c_ast.StructRef(node, ".", field, node.coord)
  1744. continue
  1745. if self._accept("LBRACKET"):
  1746. expr = self._parse_expression()
  1747. self._expect("RBRACKET")
  1748. node = c_ast.ArrayRef(node, expr, node.coord)
  1749. continue
  1750. break
  1751. return node
  1752. # BNF: argument_expression_list : assignment_expression (',' assignment_expression)*
  1753. def _parse_argument_expression_list(self) -> c_ast.Node:
  1754. expr = self._parse_assignment_expression()
  1755. exprs = [expr]
  1756. while self._accept("COMMA"):
  1757. exprs.append(self._parse_assignment_expression())
  1758. return c_ast.ExprList(exprs, expr.coord)
  1759. # BNF: constant_expression : conditional_expression
  1760. def _parse_constant_expression(self) -> c_ast.Node:
  1761. return self._parse_conditional_expression()
  1762. # ------------------------------------------------------------------
  1763. # Terminals
  1764. # ------------------------------------------------------------------
  1765. # BNF: identifier : ID
  1766. def _parse_identifier(self) -> c_ast.Node:
  1767. tok = self._expect("ID")
  1768. return c_ast.ID(tok.value, self._tok_coord(tok))
  1769. # BNF: identifier_or_typeid : ID | TYPEID
  1770. def _parse_identifier_or_typeid(self) -> c_ast.Node:
  1771. tok = self._advance()
  1772. if tok.type not in {"ID", "TYPEID"}:
  1773. self._parse_error("Expected identifier", self._tok_coord(tok))
  1774. return c_ast.ID(tok.value, self._tok_coord(tok))
  1775. # BNF: constant : INT_CONST | FLOAT_CONST | CHAR_CONST
  1776. def _parse_constant(self) -> c_ast.Node:
  1777. tok = self._advance()
  1778. if tok.type in _INT_CONST:
  1779. u_count = 0
  1780. l_count = 0
  1781. for ch in tok.value[-3:]:
  1782. if ch in ("l", "L"):
  1783. l_count += 1
  1784. elif ch in ("u", "U"):
  1785. u_count += 1
  1786. if u_count > 1:
  1787. raise ValueError("Constant cannot have more than one u/U suffix.")
  1788. if l_count > 2:
  1789. raise ValueError("Constant cannot have more than two l/L suffix.")
  1790. prefix = "unsigned " * u_count + "long " * l_count
  1791. return c_ast.Constant(prefix + "int", tok.value, self._tok_coord(tok))
  1792. if tok.type in _FLOAT_CONST:
  1793. if tok.value[-1] in ("f", "F"):
  1794. t = "float"
  1795. elif tok.value[-1] in ("l", "L"):
  1796. t = "long double"
  1797. else:
  1798. t = "double"
  1799. return c_ast.Constant(t, tok.value, self._tok_coord(tok))
  1800. if tok.type in _CHAR_CONST:
  1801. return c_ast.Constant("char", tok.value, self._tok_coord(tok))
  1802. self._parse_error("Invalid constant", self._tok_coord(tok))
  1803. # BNF: unified_string_literal : STRING_LITERAL+
  1804. def _parse_unified_string_literal(self) -> c_ast.Node:
  1805. tok = self._expect("STRING_LITERAL")
  1806. node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
  1807. while self._peek_type() == "STRING_LITERAL":
  1808. tok2 = self._advance()
  1809. node.value = node.value[:-1] + tok2.value[1:]
  1810. return node
  1811. # BNF: unified_wstring_literal : WSTRING_LITERAL+
  1812. def _parse_unified_wstring_literal(self) -> c_ast.Node:
  1813. tok = self._advance()
  1814. if tok.type not in _WSTR_LITERAL:
  1815. self._parse_error("Invalid string literal", self._tok_coord(tok))
  1816. node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
  1817. while self._peek_type() in _WSTR_LITERAL:
  1818. tok2 = self._advance()
  1819. node.value = node.value.rstrip()[:-1] + tok2.value[2:]
  1820. return node
  1821. # ------------------------------------------------------------------
  1822. # Initializers
  1823. # ------------------------------------------------------------------
  1824. # BNF: initializer : assignment_expression
  1825. # | '{' initializer_list ','? '}'
  1826. # | '{' '}'
  1827. def _parse_initializer(self) -> c_ast.Node:
  1828. lbrace_tok = self._accept("LBRACE")
  1829. if lbrace_tok:
  1830. if self._accept("RBRACE"):
  1831. return c_ast.InitList([], self._tok_coord(lbrace_tok))
  1832. init_list = self._parse_initializer_list()
  1833. self._accept("COMMA")
  1834. self._expect("RBRACE")
  1835. return init_list
  1836. return self._parse_assignment_expression()
  1837. # BNF: initializer_list : initializer_item (',' initializer_item)* ','?
  1838. def _parse_initializer_list(self) -> c_ast.Node:
  1839. items = [self._parse_initializer_item()]
  1840. while self._accept("COMMA"):
  1841. if self._peek_type() == "RBRACE":
  1842. break
  1843. items.append(self._parse_initializer_item())
  1844. return c_ast.InitList(items, items[0].coord)
  1845. # BNF: initializer_item : designation? initializer
  1846. def _parse_initializer_item(self) -> c_ast.Node:
  1847. designation = None
  1848. if self._peek_type() in {"LBRACKET", "PERIOD"}:
  1849. designation = self._parse_designation()
  1850. init = self._parse_initializer()
  1851. if designation is not None:
  1852. return c_ast.NamedInitializer(designation, init)
  1853. return init
  1854. # BNF: designation : designator_list '='
  1855. def _parse_designation(self) -> List[c_ast.Node]:
  1856. designators = self._parse_designator_list()
  1857. self._expect("EQUALS")
  1858. return designators
  1859. # BNF: designator_list : designator+
  1860. def _parse_designator_list(self) -> List[c_ast.Node]:
  1861. designators = []
  1862. while self._peek_type() in {"LBRACKET", "PERIOD"}:
  1863. designators.append(self._parse_designator())
  1864. return designators
  1865. # BNF: designator : '[' constant_expression ']'
  1866. # | '.' identifier_or_typeid
  1867. def _parse_designator(self) -> c_ast.Node:
  1868. if self._accept("LBRACKET"):
  1869. expr = self._parse_constant_expression()
  1870. self._expect("RBRACKET")
  1871. return expr
  1872. if self._accept("PERIOD"):
  1873. return self._parse_identifier_or_typeid()
  1874. self._parse_error("Invalid designator", self.clex.filename)
  1875. # ------------------------------------------------------------------
  1876. # Preprocessor-like directives
  1877. # ------------------------------------------------------------------
  1878. # BNF: pp_directive : '#' ... (unsupported)
  1879. def _parse_pp_directive(self) -> NoReturn:
  1880. tok = self._expect("PPHASH")
  1881. self._parse_error("Directives not supported yet", self._tok_coord(tok))
  1882. # BNF: pppragma_directive : PPPRAGMA PPPRAGMASTR?
  1883. # | _PRAGMA '(' string_literal ')'
  1884. def _parse_pppragma_directive(self) -> c_ast.Node:
  1885. if self._peek_type() == "PPPRAGMA":
  1886. tok = self._advance()
  1887. if self._peek_type() == "PPPRAGMASTR":
  1888. str_tok = self._advance()
  1889. return c_ast.Pragma(str_tok.value, self._tok_coord(str_tok))
  1890. return c_ast.Pragma("", self._tok_coord(tok))
  1891. if self._peek_type() == "_PRAGMA":
  1892. tok = self._advance()
  1893. lparen = self._expect("LPAREN")
  1894. literal = self._parse_unified_string_literal()
  1895. self._expect("RPAREN")
  1896. return c_ast.Pragma(literal, self._tok_coord(lparen))
  1897. self._parse_error("Invalid pragma", self.clex.filename)
  1898. # BNF: pppragma_directive_list : pppragma_directive+
  1899. def _parse_pppragma_directive_list(self) -> List[c_ast.Node]:
  1900. pragmas = []
  1901. while self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
  1902. pragmas.append(self._parse_pppragma_directive())
  1903. return pragmas
  1904. # BNF: static_assert : _STATIC_ASSERT '(' constant_expression (',' string_literal)? ')'
  1905. def _parse_static_assert(self) -> List[c_ast.Node]:
  1906. tok = self._expect("_STATIC_ASSERT")
  1907. self._expect("LPAREN")
  1908. cond = self._parse_constant_expression()
  1909. msg = None
  1910. if self._accept("COMMA"):
  1911. msg = self._parse_unified_string_literal()
  1912. self._expect("RPAREN")
  1913. return [c_ast.StaticAssert(cond, msg, self._tok_coord(tok))]
  1914. _ASSIGNMENT_OPS = {
  1915. "EQUALS",
  1916. "XOREQUAL",
  1917. "TIMESEQUAL",
  1918. "DIVEQUAL",
  1919. "MODEQUAL",
  1920. "PLUSEQUAL",
  1921. "MINUSEQUAL",
  1922. "LSHIFTEQUAL",
  1923. "RSHIFTEQUAL",
  1924. "ANDEQUAL",
  1925. "OREQUAL",
  1926. }
  1927. # Precedence of operators (lower number = weather binding)
  1928. # If this changes, c_generator.CGenerator.precedence_map needs to change as
  1929. # well
  1930. _BINARY_PRECEDENCE = {
  1931. "LOR": 0,
  1932. "LAND": 1,
  1933. "OR": 2,
  1934. "XOR": 3,
  1935. "AND": 4,
  1936. "EQ": 5,
  1937. "NE": 5,
  1938. "GT": 6,
  1939. "GE": 6,
  1940. "LT": 6,
  1941. "LE": 6,
  1942. "RSHIFT": 7,
  1943. "LSHIFT": 7,
  1944. "PLUS": 8,
  1945. "MINUS": 8,
  1946. "TIMES": 9,
  1947. "DIVIDE": 9,
  1948. "MOD": 9,
  1949. }
  1950. _STORAGE_CLASS = {"AUTO", "REGISTER", "STATIC", "EXTERN", "TYPEDEF", "_THREAD_LOCAL"}
  1951. _FUNCTION_SPEC = {"INLINE", "_NORETURN"}
  1952. _TYPE_QUALIFIER = {"CONST", "RESTRICT", "VOLATILE", "_ATOMIC"}
  1953. _TYPE_SPEC_SIMPLE = {
  1954. "VOID",
  1955. "_BOOL",
  1956. "CHAR",
  1957. "SHORT",
  1958. "INT",
  1959. "LONG",
  1960. "FLOAT",
  1961. "DOUBLE",
  1962. "_COMPLEX",
  1963. "SIGNED",
  1964. "UNSIGNED",
  1965. "__INT128",
  1966. }
  1967. _DECL_START = (
  1968. _STORAGE_CLASS
  1969. | _FUNCTION_SPEC
  1970. | _TYPE_QUALIFIER
  1971. | _TYPE_SPEC_SIMPLE
  1972. | {"TYPEID", "STRUCT", "UNION", "ENUM", "_ALIGNAS", "_ATOMIC"}
  1973. )
  1974. _EXPR_START = {
  1975. "ID",
  1976. "LPAREN",
  1977. "PLUSPLUS",
  1978. "MINUSMINUS",
  1979. "PLUS",
  1980. "MINUS",
  1981. "TIMES",
  1982. "AND",
  1983. "NOT",
  1984. "LNOT",
  1985. "SIZEOF",
  1986. "_ALIGNOF",
  1987. "OFFSETOF",
  1988. }
  1989. _INT_CONST = {
  1990. "INT_CONST_DEC",
  1991. "INT_CONST_OCT",
  1992. "INT_CONST_HEX",
  1993. "INT_CONST_BIN",
  1994. "INT_CONST_CHAR",
  1995. }
  1996. _FLOAT_CONST = {"FLOAT_CONST", "HEX_FLOAT_CONST"}
  1997. _CHAR_CONST = {
  1998. "CHAR_CONST",
  1999. "WCHAR_CONST",
  2000. "U8CHAR_CONST",
  2001. "U16CHAR_CONST",
  2002. "U32CHAR_CONST",
  2003. }
  2004. _STRING_LITERAL = {"STRING_LITERAL"}
  2005. _WSTR_LITERAL = {
  2006. "WSTRING_LITERAL",
  2007. "U8STRING_LITERAL",
  2008. "U16STRING_LITERAL",
  2009. "U32STRING_LITERAL",
  2010. }
  2011. _STARTS_EXPRESSION = (
  2012. _EXPR_START
  2013. | _INT_CONST
  2014. | _FLOAT_CONST
  2015. | _CHAR_CONST
  2016. | _STRING_LITERAL
  2017. | _WSTR_LITERAL
  2018. )
  2019. _STARTS_STATEMENT = {
  2020. "LBRACE",
  2021. "IF",
  2022. "SWITCH",
  2023. "WHILE",
  2024. "DO",
  2025. "FOR",
  2026. "GOTO",
  2027. "BREAK",
  2028. "CONTINUE",
  2029. "RETURN",
  2030. "CASE",
  2031. "DEFAULT",
  2032. "PPPRAGMA",
  2033. "_PRAGMA",
  2034. "_STATIC_ASSERT",
  2035. "SEMI",
  2036. }
  2037. class _TokenStream:
  2038. """Wraps a lexer to provide convenient, buffered access to the underlying
  2039. token stream. The lexer is expected to be initialized with the input
  2040. string already.
  2041. """
  2042. def __init__(self, lexer: CLexer) -> None:
  2043. self._lexer = lexer
  2044. self._buffer: List[Optional[_Token]] = []
  2045. self._index = 0
  2046. def peek(self, k: int = 1) -> Optional[_Token]:
  2047. """Peek at the k-th next token in the stream, without consuming it.
  2048. Examples:
  2049. k=1 returns the immediate next token.
  2050. k=2 returns the token after that.
  2051. """
  2052. if k <= 0:
  2053. return None
  2054. self._fill(k)
  2055. return self._buffer[self._index + k - 1]
  2056. def next(self) -> Optional[_Token]:
  2057. """Consume a single token and return it."""
  2058. self._fill(1)
  2059. tok = self._buffer[self._index]
  2060. self._index += 1
  2061. return tok
  2062. # The 'mark' and 'reset' methods are useful for speculative parsing with
  2063. # backtracking; when the parser needs to examine a sequence of tokens
  2064. # and potentially decide to try a different path on the same sequence, it
  2065. # can call 'mark' to obtain the current token position, and if the first
  2066. # path fails restore the position with `reset(pos)`.
  2067. def mark(self) -> int:
  2068. return self._index
  2069. def reset(self, mark: int) -> None:
  2070. self._index = mark
  2071. def _fill(self, n: int) -> None:
  2072. while len(self._buffer) < self._index + n:
  2073. tok = self._lexer.token()
  2074. self._buffer.append(tok)
  2075. if tok is None:
  2076. break
  2077. # Declaration specifiers are represented by a dictionary with entries:
  2078. # - qual: a list of type qualifiers
  2079. # - storage: a list of storage class specifiers
  2080. # - type: a list of type specifiers
  2081. # - function: a list of function specifiers
  2082. # - alignment: a list of alignment specifiers
  2083. class _DeclSpec(TypedDict):
  2084. qual: List[Any]
  2085. storage: List[Any]
  2086. type: List[Any]
  2087. function: List[Any]
  2088. alignment: List[Any]
  2089. _DeclSpecKind = Literal["qual", "storage", "type", "function", "alignment"]
  2090. class _DeclInfo(TypedDict):
  2091. # Declarator payloads used by declaration/initializer parsing:
  2092. # - decl: the declarator node (may be None for abstract/implicit cases)
  2093. # - init: optional initializer expression
  2094. # - bitsize: optional bit-field width expression (for struct declarators)
  2095. decl: Optional[c_ast.Node]
  2096. init: Optional[c_ast.Node]
  2097. bitsize: Optional[c_ast.Node]