| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376 |
- # ------------------------------------------------------------------------------
- # pycparser: c_parser.py
- #
- # Recursive-descent parser for the C language.
- #
- # Eli Bendersky [https://eli.thegreenplace.net/]
- # License: BSD
- # ------------------------------------------------------------------------------
- from dataclasses import dataclass
- from typing import (
- Any,
- Dict,
- List,
- Literal,
- NoReturn,
- Optional,
- Tuple,
- TypedDict,
- cast,
- )
- from . import c_ast
- from .c_lexer import CLexer, _Token
- from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
- @dataclass
- class Coord:
- """Coordinates of a syntactic element. Consists of:
- - File name
- - Line number
- - Column number
- """
- file: str
- line: int
- column: Optional[int] = None
- def __str__(self) -> str:
- text = f"{self.file}:{self.line}"
- if self.column:
- text += f":{self.column}"
- return text
- class ParseError(Exception):
- pass
- class CParser:
- """Recursive-descent C parser.
- Usage:
- parser = CParser()
- ast = parser.parse(text, filename)
- The `lexer` parameter lets you inject a lexer class (defaults to CLexer).
- The parameters after `lexer` are accepted for backward compatibility with
- the old PLY-based parser and are otherwise unused.
- """
- def __init__(
- self,
- lex_optimize: bool = True,
- lexer: type[CLexer] = CLexer,
- lextab: str = "pycparser.lextab",
- yacc_optimize: bool = True,
- yacctab: str = "pycparser.yacctab",
- yacc_debug: bool = False,
- taboutputdir: str = "",
- ) -> None:
- self.clex: CLexer = lexer(
- error_func=self._lex_error_func,
- on_lbrace_func=self._lex_on_lbrace_func,
- on_rbrace_func=self._lex_on_rbrace_func,
- type_lookup_func=self._lex_type_lookup_func,
- )
- # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
- # the current (topmost) scope. Each scope is a dictionary that
- # specifies whether a name is a type. If _scope_stack[n][name] is
- # True, 'name' is currently a type in the scope. If it's False,
- # 'name' is used in the scope but not as a type (for instance, if we
- # saw: int name;
- # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
- # in this scope at all.
- self._scope_stack: List[Dict[str, bool]] = [dict()]
- self._tokens: _TokenStream = _TokenStream(self.clex)
- def parse(
- self, text: str, filename: str = "", debug: bool = False
- ) -> c_ast.FileAST:
- """Parses C code and returns an AST.
- text:
- A string containing the C source code
- filename:
- Name of the file being parsed (for meaningful
- error messages)
- debug:
- Deprecated debug flag (unused); for backwards compatibility.
- """
- self._scope_stack = [dict()]
- self.clex.input(text, filename)
- self._tokens = _TokenStream(self.clex)
- ast = self._parse_translation_unit_or_empty()
- tok = self._peek()
- if tok is not None:
- self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
- return ast
- # ------------------------------------------------------------------
- # Scope and declaration helpers
- # ------------------------------------------------------------------
- def _coord(self, lineno: int, column: Optional[int] = None) -> Coord:
- return Coord(file=self.clex.filename, line=lineno, column=column)
- def _parse_error(self, msg: str, coord: Coord | str | None) -> NoReturn:
- raise ParseError(f"{coord}: {msg}")
- def _push_scope(self) -> None:
- self._scope_stack.append(dict())
- def _pop_scope(self) -> None:
- assert len(self._scope_stack) > 1
- self._scope_stack.pop()
- def _add_typedef_name(self, name: str, coord: Optional[Coord]) -> None:
- """Add a new typedef name (ie a TYPEID) to the current scope"""
- if not self._scope_stack[-1].get(name, True):
- self._parse_error(
- f"Typedef {name!r} previously declared as non-typedef in this scope",
- coord,
- )
- self._scope_stack[-1][name] = True
- def _add_identifier(self, name: str, coord: Optional[Coord]) -> None:
- """Add a new object, function, or enum member name (ie an ID) to the
- current scope
- """
- if self._scope_stack[-1].get(name, False):
- self._parse_error(
- f"Non-typedef {name!r} previously declared as typedef in this scope",
- coord,
- )
- self._scope_stack[-1][name] = False
- def _is_type_in_scope(self, name: str) -> bool:
- """Is *name* a typedef-name in the current scope?"""
- for scope in reversed(self._scope_stack):
- # If name is an identifier in this scope it shadows typedefs in
- # higher scopes.
- in_scope = scope.get(name)
- if in_scope is not None:
- return in_scope
- return False
- def _lex_error_func(self, msg: str, line: int, column: int) -> None:
- self._parse_error(msg, self._coord(line, column))
- def _lex_on_lbrace_func(self) -> None:
- self._push_scope()
- def _lex_on_rbrace_func(self) -> None:
- self._pop_scope()
- def _lex_type_lookup_func(self, name: str) -> bool:
- """Looks up types that were previously defined with
- typedef.
- Passed to the lexer for recognizing identifiers that
- are types.
- """
- return self._is_type_in_scope(name)
- # To understand what's going on here, read sections A.8.5 and
- # A.8.6 of K&R2 very carefully.
- #
- # A C type consists of a basic type declaration, with a list
- # of modifiers. For example:
- #
- # int *c[5];
- #
- # The basic declaration here is 'int c', and the pointer and
- # the array are the modifiers.
- #
- # Basic declarations are represented by TypeDecl (from module c_ast) and the
- # modifiers are FuncDecl, PtrDecl and ArrayDecl.
- #
- # The standard states that whenever a new modifier is parsed, it should be
- # added to the end of the list of modifiers. For example:
- #
- # K&R2 A.8.6.2: Array Declarators
- #
- # In a declaration T D where D has the form
- # D1 [constant-expression-opt]
- # and the type of the identifier in the declaration T D1 is
- # "type-modifier T", the type of the
- # identifier of D is "type-modifier array of T"
- #
- # This is what this method does. The declarator it receives
- # can be a list of declarators ending with TypeDecl. It
- # tacks the modifier to the end of this list, just before
- # the TypeDecl.
- #
- # Additionally, the modifier may be a list itself. This is
- # useful for pointers, that can come as a chain from the rule
- # p_pointer. In this case, the whole modifier list is spliced
- # into the new location.
- def _type_modify_decl(self, decl: Any, modifier: Any) -> c_ast.Node:
- """Tacks a type modifier on a declarator, and returns
- the modified declarator.
- Note: the declarator and modifier may be modified
- """
- modifier_head = modifier
- modifier_tail = modifier
- # The modifier may be a nested list. Reach its tail.
- while modifier_tail.type:
- modifier_tail = modifier_tail.type
- # If the decl is a basic type, just tack the modifier onto it.
- if isinstance(decl, c_ast.TypeDecl):
- modifier_tail.type = decl
- return modifier
- else:
- # Otherwise, the decl is a list of modifiers. Reach
- # its tail and splice the modifier onto the tail,
- # pointing to the underlying basic type.
- decl_tail = decl
- while not isinstance(decl_tail.type, c_ast.TypeDecl):
- decl_tail = decl_tail.type
- modifier_tail.type = decl_tail.type
- decl_tail.type = modifier_head
- return decl
- # Due to the order in which declarators are constructed,
- # they have to be fixed in order to look like a normal AST.
- #
- # When a declaration arrives from syntax construction, it has
- # these problems:
- # * The innermost TypeDecl has no type (because the basic
- # type is only known at the uppermost declaration level)
- # * The declaration has no variable name, since that is saved
- # in the innermost TypeDecl
- # * The typename of the declaration is a list of type
- # specifiers, and not a node. Here, basic identifier types
- # should be separated from more complex types like enums
- # and structs.
- #
- # This method fixes these problems.
- def _fix_decl_name_type(
- self,
- decl: c_ast.Decl | c_ast.Typedef | c_ast.Typename,
- typename: List[Any],
- ) -> c_ast.Decl | c_ast.Typedef | c_ast.Typename:
- """Fixes a declaration. Modifies decl."""
- # Reach the underlying basic type
- typ = decl
- while not isinstance(typ, c_ast.TypeDecl):
- typ = typ.type
- decl.name = typ.declname
- typ.quals = decl.quals[:]
- # The typename is a list of types. If any type in this
- # list isn't an IdentifierType, it must be the only
- # type in the list (it's illegal to declare "int enum ..")
- # If all the types are basic, they're collected in the
- # IdentifierType holder.
- for tn in typename:
- if not isinstance(tn, c_ast.IdentifierType):
- if len(typename) > 1:
- self._parse_error("Invalid multiple types specified", tn.coord)
- else:
- typ.type = tn
- return decl
- if not typename:
- # Functions default to returning int
- if not isinstance(decl.type, c_ast.FuncDecl):
- self._parse_error("Missing type in declaration", decl.coord)
- typ.type = c_ast.IdentifierType(["int"], coord=decl.coord)
- else:
- # At this point, we know that typename is a list of IdentifierType
- # nodes. Concatenate all the names into a single list.
- typ.type = c_ast.IdentifierType(
- [name for id in typename for name in id.names], coord=typename[0].coord
- )
- return decl
- def _add_declaration_specifier(
- self,
- declspec: Optional["_DeclSpec"],
- newspec: Any,
- kind: "_DeclSpecKind",
- append: bool = False,
- ) -> "_DeclSpec":
- """See _DeclSpec for the specifier dictionary layout."""
- if declspec is None:
- spec: _DeclSpec = dict(
- qual=[], storage=[], type=[], function=[], alignment=[]
- )
- else:
- spec = declspec
- if append:
- spec[kind].append(newspec)
- else:
- spec[kind].insert(0, newspec)
- return spec
- def _build_declarations(
- self,
- spec: "_DeclSpec",
- decls: List["_DeclInfo"],
- typedef_namespace: bool = False,
- ) -> List[c_ast.Node]:
- """Builds a list of declarations all sharing the given specifiers.
- If typedef_namespace is true, each declared name is added
- to the "typedef namespace", which also includes objects,
- functions, and enum constants.
- """
- is_typedef = "typedef" in spec["storage"]
- declarations = []
- # Bit-fields are allowed to be unnamed.
- if decls[0].get("bitsize") is None:
- # When redeclaring typedef names as identifiers in inner scopes, a
- # problem can occur where the identifier gets grouped into
- # spec['type'], leaving decl as None. This can only occur for the
- # first declarator.
- if decls[0]["decl"] is None:
- if (
- len(spec["type"]) < 2
- or len(spec["type"][-1].names) != 1
- or not self._is_type_in_scope(spec["type"][-1].names[0])
- ):
- coord = "?"
- for t in spec["type"]:
- if hasattr(t, "coord"):
- coord = t.coord
- break
- self._parse_error("Invalid declaration", coord)
- # Make this look as if it came from "direct_declarator:ID"
- decls[0]["decl"] = c_ast.TypeDecl(
- declname=spec["type"][-1].names[0],
- type=None,
- quals=None,
- align=spec["alignment"],
- coord=spec["type"][-1].coord,
- )
- # Remove the "new" type's name from the end of spec['type']
- del spec["type"][-1]
- # A similar problem can occur where the declaration ends up
- # looking like an abstract declarator. Give it a name if this is
- # the case.
- elif not isinstance(
- decls[0]["decl"],
- (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
- ):
- decls_0_tail = cast(Any, decls[0]["decl"])
- while not isinstance(decls_0_tail, c_ast.TypeDecl):
- decls_0_tail = decls_0_tail.type
- if decls_0_tail.declname is None:
- decls_0_tail.declname = spec["type"][-1].names[0]
- del spec["type"][-1]
- for decl in decls:
- assert decl["decl"] is not None
- if is_typedef:
- declaration = c_ast.Typedef(
- name=None,
- quals=spec["qual"],
- storage=spec["storage"],
- type=decl["decl"],
- coord=decl["decl"].coord,
- )
- else:
- declaration = c_ast.Decl(
- name=None,
- quals=spec["qual"],
- align=spec["alignment"],
- storage=spec["storage"],
- funcspec=spec["function"],
- type=decl["decl"],
- init=decl.get("init"),
- bitsize=decl.get("bitsize"),
- coord=decl["decl"].coord,
- )
- if isinstance(
- declaration.type,
- (c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType),
- ):
- fixed_decl = declaration
- else:
- fixed_decl = self._fix_decl_name_type(declaration, spec["type"])
- # Add the type name defined by typedef to a
- # symbol table (for usage in the lexer)
- if typedef_namespace:
- if is_typedef:
- self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
- else:
- self._add_identifier(fixed_decl.name, fixed_decl.coord)
- fixed_decl = fix_atomic_specifiers(
- cast(c_ast.Decl | c_ast.Typedef, fixed_decl)
- )
- declarations.append(fixed_decl)
- return declarations
- def _build_function_definition(
- self,
- spec: "_DeclSpec",
- decl: c_ast.Node,
- param_decls: Optional[List[c_ast.Node]],
- body: c_ast.Node,
- ) -> c_ast.Node:
- """Builds a function definition."""
- if "typedef" in spec["storage"]:
- self._parse_error("Invalid typedef", decl.coord)
- declaration = self._build_declarations(
- spec=spec,
- decls=[dict(decl=decl, init=None, bitsize=None)],
- typedef_namespace=True,
- )[0]
- return c_ast.FuncDef(
- decl=declaration, param_decls=param_decls, body=body, coord=decl.coord
- )
- def _select_struct_union_class(self, token: str) -> type:
- """Given a token (either STRUCT or UNION), selects the
- appropriate AST class.
- """
- if token == "struct":
- return c_ast.Struct
- else:
- return c_ast.Union
- # ------------------------------------------------------------------
- # Token helpers
- # ------------------------------------------------------------------
- def _peek(self, k: int = 1) -> Optional[_Token]:
- """Return the k-th next token without consuming it (1-based)."""
- return self._tokens.peek(k)
- def _peek_type(self, k: int = 1) -> Optional[str]:
- """Return the type of the k-th next token, or None if absent (1-based)."""
- tok = self._peek(k)
- return tok.type if tok is not None else None
- def _advance(self) -> _Token:
- tok = self._tokens.next()
- if tok is None:
- self._parse_error("At end of input", self.clex.filename)
- else:
- return tok
- def _accept(self, token_type: str) -> Optional[_Token]:
- """Conditionally consume next token, only if it's of token_type.
- If it is of the expected type, consume and return it.
- Otherwise, leaves the token intact and returns None.
- """
- tok = self._peek()
- if tok is not None and tok.type == token_type:
- return self._advance()
- return None
- def _expect(self, token_type: str) -> _Token:
- tok = self._advance()
- if tok.type != token_type:
- self._parse_error(f"before: {tok.value}", self._tok_coord(tok))
- return tok
- def _mark(self) -> int:
- return self._tokens.mark()
- def _reset(self, mark: int) -> None:
- self._tokens.reset(mark)
- def _tok_coord(self, tok: _Token) -> Coord:
- return self._coord(tok.lineno, tok.column)
- def _starts_declaration(self, tok: Optional[_Token] = None) -> bool:
- tok = tok or self._peek()
- if tok is None:
- return False
- return tok.type in _DECL_START
- def _starts_expression(self, tok: Optional[_Token] = None) -> bool:
- tok = tok or self._peek()
- if tok is None:
- return False
- return tok.type in _STARTS_EXPRESSION
- def _starts_statement(self) -> bool:
- tok_type = self._peek_type()
- if tok_type is None:
- return False
- if tok_type in _STARTS_STATEMENT:
- return True
- return self._starts_expression()
- def _starts_declarator(self, id_only: bool = False) -> bool:
- tok_type = self._peek_type()
- if tok_type is None:
- return False
- if tok_type in {"TIMES", "LPAREN"}:
- return True
- if id_only:
- return tok_type == "ID"
- return tok_type in {"ID", "TYPEID"}
- def _peek_declarator_name_info(self) -> Tuple[Optional[str], bool]:
- mark = self._mark()
- tok_type, saw_paren = self._scan_declarator_name_info()
- self._reset(mark)
- return tok_type, saw_paren
- def _parse_any_declarator(
- self, allow_abstract: bool = False, typeid_paren_as_abstract: bool = False
- ) -> Tuple[Optional[c_ast.Node], bool]:
- # C declarators are ambiguous without lookahead. For example:
- # int foo(int (aa)); -> aa is a name (ID)
- # typedef char TT;
- # int bar(int (TT)); -> TT is a type (TYPEID) in parens
- name_type, saw_paren = self._peek_declarator_name_info()
- if name_type is None or (
- typeid_paren_as_abstract and name_type == "TYPEID" and saw_paren
- ):
- if not allow_abstract:
- tok = self._peek()
- coord = self._tok_coord(tok) if tok is not None else self.clex.filename
- self._parse_error("Invalid declarator", coord)
- decl = self._parse_abstract_declarator_opt()
- return decl, False
- if name_type == "TYPEID":
- if typeid_paren_as_abstract:
- decl = self._parse_typeid_noparen_declarator()
- else:
- decl = self._parse_typeid_declarator()
- else:
- decl = self._parse_id_declarator()
- return decl, True
- def _scan_declarator_name_info(self) -> Tuple[Optional[str], bool]:
- saw_paren = False
- while self._accept("TIMES"):
- while self._peek_type() in _TYPE_QUALIFIER:
- self._advance()
- tok = self._peek()
- if tok is None:
- return None, saw_paren
- if tok.type in {"ID", "TYPEID"}:
- self._advance()
- return tok.type, saw_paren
- if tok.type == "LPAREN":
- saw_paren = True
- self._advance()
- tok_type, nested_paren = self._scan_declarator_name_info()
- if nested_paren:
- saw_paren = True
- depth = 1
- while True:
- tok = self._peek()
- if tok is None:
- return None, saw_paren
- if tok.type == "LPAREN":
- depth += 1
- elif tok.type == "RPAREN":
- depth -= 1
- self._advance()
- if depth == 0:
- break
- continue
- self._advance()
- return tok_type, saw_paren
- return None, saw_paren
- def _starts_direct_abstract_declarator(self) -> bool:
- return self._peek_type() in {"LPAREN", "LBRACKET"}
- def _is_assignment_op(self) -> bool:
- tok = self._peek()
- return tok is not None and tok.type in _ASSIGNMENT_OPS
- def _try_parse_paren_type_name(
- self,
- ) -> Optional[Tuple[c_ast.Typename, int, _Token]]:
- """Parse and return a parenthesized type name if present.
- Returns (typ, mark, lparen_tok) when the next tokens look like
- '(' type_name ')', where typ is the parsed type name, mark is the
- token-stream position before parsing, and lparen_tok is the LPAREN
- token. Returns None if no parenthesized type name is present.
- """
- mark = self._mark()
- lparen_tok = self._accept("LPAREN")
- if lparen_tok is None:
- return None
- if not self._starts_declaration():
- self._reset(mark)
- return None
- typ = self._parse_type_name()
- if self._accept("RPAREN") is None:
- self._reset(mark)
- return None
- return typ, mark, lparen_tok
- # ------------------------------------------------------------------
- # Top-level
- # ------------------------------------------------------------------
- # BNF: translation_unit_or_empty : translation_unit | empty
- def _parse_translation_unit_or_empty(self) -> c_ast.FileAST:
- if self._peek() is None:
- return c_ast.FileAST([])
- return c_ast.FileAST(self._parse_translation_unit())
- # BNF: translation_unit : external_declaration+
- def _parse_translation_unit(self) -> List[c_ast.Node]:
- ext = []
- while self._peek() is not None:
- ext.extend(self._parse_external_declaration())
- return ext
- # BNF: external_declaration : function_definition
- # | declaration
- # | pp_directive
- # | pppragma_directive
- # | static_assert
- # | ';'
- def _parse_external_declaration(self) -> List[c_ast.Node]:
- tok = self._peek()
- if tok is None:
- return []
- if tok.type == "PPHASH":
- self._parse_pp_directive()
- return []
- if tok.type in {"PPPRAGMA", "_PRAGMA"}:
- return [self._parse_pppragma_directive()]
- if self._accept("SEMI"):
- return []
- if tok.type == "_STATIC_ASSERT":
- return self._parse_static_assert()
- if not self._starts_declaration(tok):
- # Special handling for old-style function definitions that have an
- # implicit return type, e.g.
- #
- # foo() {
- # return 5;
- # }
- #
- # These get an implicit 'int' return type.
- decl = self._parse_id_declarator()
- param_decls = None
- if self._peek_type() != "LBRACE":
- self._parse_error("Invalid function definition", decl.coord)
- spec: _DeclSpec = dict(
- qual=[],
- alignment=[],
- storage=[],
- type=[c_ast.IdentifierType(["int"], coord=decl.coord)],
- function=[],
- )
- func = self._build_function_definition(
- spec=spec,
- decl=decl,
- param_decls=param_decls,
- body=self._parse_compound_statement(),
- )
- return [func]
- # From here on, parsing a standard declatation/definition.
- spec, saw_type, spec_coord = self._parse_declaration_specifiers(
- allow_no_type=True
- )
- name_type, _ = self._peek_declarator_name_info()
- if name_type != "ID":
- decls = self._parse_decl_body_with_spec(spec, saw_type)
- self._expect("SEMI")
- return decls
- decl = self._parse_id_declarator()
- if self._peek_type() == "LBRACE" or self._starts_declaration():
- param_decls = None
- if self._starts_declaration():
- param_decls = self._parse_declaration_list()
- if self._peek_type() != "LBRACE":
- self._parse_error("Invalid function definition", decl.coord)
- if not spec["type"]:
- spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
- func = self._build_function_definition(
- spec=spec,
- decl=decl,
- param_decls=param_decls,
- body=self._parse_compound_statement(),
- )
- return [func]
- decl_dict: "_DeclInfo" = dict(decl=decl, init=None, bitsize=None)
- if self._accept("EQUALS"):
- decl_dict["init"] = self._parse_initializer()
- decls = self._parse_init_declarator_list(first=decl_dict)
- decls = self._build_declarations(spec=spec, decls=decls, typedef_namespace=True)
- self._expect("SEMI")
- return decls
- # ------------------------------------------------------------------
- # Declarations
- #
- # Declarations always come as lists (because they can be several in one
- # line). When returning parsed declarations, a list is always returned -
- # even if it contains a single element.
- # ------------------------------------------------------------------
- def _parse_declaration(self) -> List[c_ast.Node]:
- decls = self._parse_decl_body()
- self._expect("SEMI")
- return decls
- # BNF: decl_body : declaration_specifiers decl_body_with_spec
- def _parse_decl_body(self) -> List[c_ast.Node]:
- spec, saw_type, _ = self._parse_declaration_specifiers(allow_no_type=True)
- return self._parse_decl_body_with_spec(spec, saw_type)
- # BNF: decl_body_with_spec : init_declarator_list
- # | struct_or_union_or_enum_only
- def _parse_decl_body_with_spec(
- self, spec: "_DeclSpec", saw_type: bool
- ) -> List[c_ast.Node]:
- decls = None
- if saw_type:
- if self._starts_declarator():
- decls = self._parse_init_declarator_list()
- else:
- if self._starts_declarator(id_only=True):
- decls = self._parse_init_declarator_list(id_only=True)
- if decls is None:
- ty = spec["type"]
- s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
- if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
- decls = [
- c_ast.Decl(
- name=None,
- quals=spec["qual"],
- align=spec["alignment"],
- storage=spec["storage"],
- funcspec=spec["function"],
- type=ty[0],
- init=None,
- bitsize=None,
- coord=ty[0].coord,
- )
- ]
- else:
- decls = self._build_declarations(
- spec=spec,
- decls=[dict(decl=None, init=None, bitsize=None)],
- typedef_namespace=True,
- )
- else:
- decls = self._build_declarations(
- spec=spec, decls=decls, typedef_namespace=True
- )
- return decls
- # BNF: declaration_list : declaration+
- def _parse_declaration_list(self) -> List[c_ast.Node]:
- decls = []
- while self._starts_declaration():
- decls.extend(self._parse_declaration())
- return decls
- # BNF: declaration_specifiers : (storage_class_specifier
- # | type_specifier
- # | type_qualifier
- # | function_specifier
- # | alignment_specifier)+
- def _parse_declaration_specifiers(
- self, allow_no_type: bool = False
- ) -> Tuple["_DeclSpec", bool, Optional[Coord]]:
- """Parse declaration-specifier sequence.
- allow_no_type:
- If True, allow a missing type specifier without error.
- Returns:
- (spec, saw_type, first_coord) where spec is a dict with
- qual/storage/type/function/alignment entries, saw_type is True
- if a type specifier was consumed, and first_coord is the coord
- of the first specifier token (used for diagnostics).
- """
- spec = None
- saw_type = False
- first_coord = None
- while True:
- tok = self._peek()
- if tok is None:
- break
- if tok.type == "_ALIGNAS":
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._parse_alignment_specifier(), "alignment", append=True
- )
- continue
- if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._parse_atomic_specifier(), "type", append=True
- )
- saw_type = True
- continue
- if tok.type in _TYPE_QUALIFIER:
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._advance().value, "qual", append=True
- )
- continue
- if tok.type in _STORAGE_CLASS:
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._advance().value, "storage", append=True
- )
- continue
- if tok.type in _FUNCTION_SPEC:
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._advance().value, "function", append=True
- )
- continue
- if tok.type in _TYPE_SPEC_SIMPLE:
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- tok = self._advance()
- spec = self._add_declaration_specifier(
- spec,
- c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
- "type",
- append=True,
- )
- saw_type = True
- continue
- if tok.type == "TYPEID":
- if saw_type:
- break
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- tok = self._advance()
- spec = self._add_declaration_specifier(
- spec,
- c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
- "type",
- append=True,
- )
- saw_type = True
- continue
- if tok.type in {"STRUCT", "UNION"}:
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._parse_struct_or_union_specifier(), "type", append=True
- )
- saw_type = True
- continue
- if tok.type == "ENUM":
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._parse_enum_specifier(), "type", append=True
- )
- saw_type = True
- continue
- break
- if spec is None:
- self._parse_error("Invalid declaration", self.clex.filename)
- if not saw_type and not allow_no_type:
- self._parse_error("Missing type in declaration", first_coord)
- return spec, saw_type, first_coord
- # BNF: specifier_qualifier_list : (type_specifier
- # | type_qualifier
- # | alignment_specifier)+
- def _parse_specifier_qualifier_list(self) -> "_DeclSpec":
- spec = None
- saw_type = False
- saw_alignment = False
- first_coord = None
- while True:
- tok = self._peek()
- if tok is None:
- break
- if tok.type == "_ALIGNAS":
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._parse_alignment_specifier(), "alignment", append=True
- )
- saw_alignment = True
- continue
- if tok.type == "_ATOMIC" and self._peek_type(2) == "LPAREN":
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._parse_atomic_specifier(), "type", append=True
- )
- saw_type = True
- continue
- if tok.type in _TYPE_QUALIFIER:
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._advance().value, "qual", append=True
- )
- continue
- if tok.type in _TYPE_SPEC_SIMPLE:
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- tok = self._advance()
- spec = self._add_declaration_specifier(
- spec,
- c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
- "type",
- append=True,
- )
- saw_type = True
- continue
- if tok.type == "TYPEID":
- if saw_type:
- break
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- tok = self._advance()
- spec = self._add_declaration_specifier(
- spec,
- c_ast.IdentifierType([tok.value], coord=self._tok_coord(tok)),
- "type",
- append=True,
- )
- saw_type = True
- continue
- if tok.type in {"STRUCT", "UNION"}:
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._parse_struct_or_union_specifier(), "type", append=True
- )
- saw_type = True
- continue
- if tok.type == "ENUM":
- if first_coord is None:
- first_coord = self._tok_coord(tok)
- spec = self._add_declaration_specifier(
- spec, self._parse_enum_specifier(), "type", append=True
- )
- saw_type = True
- continue
- break
- if spec is None:
- self._parse_error("Invalid specifier list", self.clex.filename)
- if not saw_type and not saw_alignment:
- self._parse_error("Missing type in declaration", first_coord)
- if spec.get("storage") is None:
- spec["storage"] = []
- if spec.get("function") is None:
- spec["function"] = []
- return spec
- # BNF: type_qualifier_list : type_qualifier+
- def _parse_type_qualifier_list(self) -> List[str]:
- quals = []
- while self._peek_type() in _TYPE_QUALIFIER:
- quals.append(self._advance().value)
- return quals
- # BNF: alignment_specifier : _ALIGNAS '(' type_name | constant_expression ')'
- def _parse_alignment_specifier(self) -> c_ast.Node:
- tok = self._expect("_ALIGNAS")
- self._expect("LPAREN")
- if self._starts_declaration():
- typ = self._parse_type_name()
- self._expect("RPAREN")
- return c_ast.Alignas(typ, self._tok_coord(tok))
- expr = self._parse_constant_expression()
- self._expect("RPAREN")
- return c_ast.Alignas(expr, self._tok_coord(tok))
- # BNF: atomic_specifier : _ATOMIC '(' type_name ')'
- def _parse_atomic_specifier(self) -> c_ast.Node:
- self._expect("_ATOMIC")
- self._expect("LPAREN")
- typ = self._parse_type_name()
- self._expect("RPAREN")
- typ.quals.append("_Atomic")
- return typ
- # BNF: init_declarator_list : init_declarator (',' init_declarator)*
- def _parse_init_declarator_list(
- self, first: Optional["_DeclInfo"] = None, id_only: bool = False
- ) -> List["_DeclInfo"]:
- decls = (
- [first]
- if first is not None
- else [self._parse_init_declarator(id_only=id_only)]
- )
- while self._accept("COMMA"):
- decls.append(self._parse_init_declarator(id_only=id_only))
- return decls
- # BNF: init_declarator : declarator ('=' initializer)?
- def _parse_init_declarator(self, id_only: bool = False) -> "_DeclInfo":
- decl = self._parse_id_declarator() if id_only else self._parse_declarator()
- init = None
- if self._accept("EQUALS"):
- init = self._parse_initializer()
- return dict(decl=decl, init=init, bitsize=None)
- # ------------------------------------------------------------------
- # Structs/unions/enums
- # ------------------------------------------------------------------
- # BNF: struct_or_union_specifier : struct_or_union ID? '{' struct_declaration_list? '}'
- # | struct_or_union ID
- def _parse_struct_or_union_specifier(self) -> c_ast.Node:
- tok = self._advance()
- klass = self._select_struct_union_class(tok.value)
- if self._peek_type() in {"ID", "TYPEID"}:
- name_tok = self._advance()
- if self._peek_type() == "LBRACE":
- self._advance()
- if self._accept("RBRACE"):
- return klass(
- name=name_tok.value, decls=[], coord=self._tok_coord(name_tok)
- )
- decls = self._parse_struct_declaration_list()
- self._expect("RBRACE")
- return klass(
- name=name_tok.value, decls=decls, coord=self._tok_coord(name_tok)
- )
- return klass(
- name=name_tok.value, decls=None, coord=self._tok_coord(name_tok)
- )
- if self._peek_type() == "LBRACE":
- brace_tok = self._advance()
- if self._accept("RBRACE"):
- return klass(name=None, decls=[], coord=self._tok_coord(brace_tok))
- decls = self._parse_struct_declaration_list()
- self._expect("RBRACE")
- return klass(name=None, decls=decls, coord=self._tok_coord(brace_tok))
- self._parse_error("Invalid struct/union declaration", self._tok_coord(tok))
- # BNF: struct_declaration_list : struct_declaration+
- def _parse_struct_declaration_list(self) -> List[c_ast.Node]:
- decls = []
- while self._peek_type() not in {None, "RBRACE"}:
- items = self._parse_struct_declaration()
- if items is None:
- continue
- decls.extend(items)
- return decls
- # BNF: struct_declaration : specifier_qualifier_list struct_declarator_list? ';'
- # | static_assert
- # | pppragma_directive
- def _parse_struct_declaration(self) -> Optional[List[c_ast.Node]]:
- if self._peek_type() == "SEMI":
- self._advance()
- return None
- if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
- return [self._parse_pppragma_directive()]
- spec = self._parse_specifier_qualifier_list()
- assert "typedef" not in spec.get("storage", [])
- decls = None
- if self._starts_declarator() or self._peek_type() == "COLON":
- decls = self._parse_struct_declarator_list()
- if decls is not None:
- self._expect("SEMI")
- return self._build_declarations(spec=spec, decls=decls)
- if len(spec["type"]) == 1:
- node = spec["type"][0]
- if isinstance(node, c_ast.Node):
- decl_type = node
- else:
- decl_type = c_ast.IdentifierType(node)
- self._expect("SEMI")
- return self._build_declarations(
- spec=spec, decls=[dict(decl=decl_type, init=None, bitsize=None)]
- )
- self._expect("SEMI")
- return self._build_declarations(
- spec=spec, decls=[dict(decl=None, init=None, bitsize=None)]
- )
- # BNF: struct_declarator_list : struct_declarator (',' struct_declarator)*
- def _parse_struct_declarator_list(self) -> List["_DeclInfo"]:
- decls = [self._parse_struct_declarator()]
- while self._accept("COMMA"):
- decls.append(self._parse_struct_declarator())
- return decls
- # BNF: struct_declarator : declarator? ':' constant_expression
- # | declarator (':' constant_expression)?
- def _parse_struct_declarator(self) -> "_DeclInfo":
- if self._accept("COLON"):
- bitsize = self._parse_constant_expression()
- return {
- "decl": c_ast.TypeDecl(None, None, None, None),
- "init": None,
- "bitsize": bitsize,
- }
- decl = self._parse_declarator()
- if self._accept("COLON"):
- bitsize = self._parse_constant_expression()
- return {"decl": decl, "init": None, "bitsize": bitsize}
- return {"decl": decl, "init": None, "bitsize": None}
- # BNF: enum_specifier : ENUM ID? '{' enumerator_list? '}'
- # | ENUM ID
- def _parse_enum_specifier(self) -> c_ast.Node:
- tok = self._expect("ENUM")
- if self._peek_type() in {"ID", "TYPEID"}:
- name_tok = self._advance()
- if self._peek_type() == "LBRACE":
- self._advance()
- enums = self._parse_enumerator_list()
- self._expect("RBRACE")
- return c_ast.Enum(name_tok.value, enums, self._tok_coord(tok))
- return c_ast.Enum(name_tok.value, None, self._tok_coord(tok))
- self._expect("LBRACE")
- enums = self._parse_enumerator_list()
- self._expect("RBRACE")
- return c_ast.Enum(None, enums, self._tok_coord(tok))
- # BNF: enumerator_list : enumerator (',' enumerator)* ','?
- def _parse_enumerator_list(self) -> c_ast.Node:
- enum = self._parse_enumerator()
- enum_list = c_ast.EnumeratorList([enum], enum.coord)
- while self._accept("COMMA"):
- if self._peek_type() == "RBRACE":
- break
- enum = self._parse_enumerator()
- enum_list.enumerators.append(enum)
- return enum_list
- # BNF: enumerator : ID ('=' constant_expression)?
- def _parse_enumerator(self) -> c_ast.Node:
- name_tok = self._expect("ID")
- if self._accept("EQUALS"):
- value = self._parse_constant_expression()
- else:
- value = None
- enum = c_ast.Enumerator(name_tok.value, value, self._tok_coord(name_tok))
- self._add_identifier(enum.name, enum.coord)
- return enum
- # ------------------------------------------------------------------
- # Declarators
- # ------------------------------------------------------------------
- # BNF: declarator : pointer? direct_declarator
- def _parse_declarator(self) -> c_ast.Node:
- decl, _ = self._parse_any_declarator(
- allow_abstract=False, typeid_paren_as_abstract=False
- )
- assert decl is not None
- return decl
- # BNF: id_declarator : declarator with ID name
- def _parse_id_declarator(self) -> c_ast.Node:
- return self._parse_declarator_kind(kind="id", allow_paren=True)
- # BNF: typeid_declarator : declarator with TYPEID name
- def _parse_typeid_declarator(self) -> c_ast.Node:
- return self._parse_declarator_kind(kind="typeid", allow_paren=True)
- # BNF: typeid_noparen_declarator : declarator without parenthesized name
- def _parse_typeid_noparen_declarator(self) -> c_ast.Node:
- return self._parse_declarator_kind(kind="typeid", allow_paren=False)
- # BNF: declarator_kind : pointer? direct_declarator(kind)
- def _parse_declarator_kind(self, kind: str, allow_paren: bool) -> c_ast.Node:
- ptr = None
- if self._peek_type() == "TIMES":
- ptr = self._parse_pointer()
- direct = self._parse_direct_declarator(kind, allow_paren=allow_paren)
- if ptr is not None:
- return self._type_modify_decl(direct, ptr)
- return direct
- # BNF: direct_declarator : ID | TYPEID | '(' declarator ')'
- # | direct_declarator '[' ... ']'
- # | direct_declarator '(' ... ')'
- def _parse_direct_declarator(
- self, kind: str, allow_paren: bool = True
- ) -> c_ast.Node:
- if allow_paren and self._accept("LPAREN"):
- decl = self._parse_declarator_kind(kind, allow_paren=True)
- self._expect("RPAREN")
- else:
- if kind == "id":
- name_tok = self._expect("ID")
- else:
- name_tok = self._expect("TYPEID")
- decl = c_ast.TypeDecl(
- declname=name_tok.value,
- type=None,
- quals=None,
- align=None,
- coord=self._tok_coord(name_tok),
- )
- return self._parse_decl_suffixes(decl)
- def _parse_decl_suffixes(self, decl: c_ast.Node) -> c_ast.Node:
- """Parse a chain of array/function suffixes and attach them to decl."""
- while True:
- if self._peek_type() == "LBRACKET":
- decl = self._type_modify_decl(decl, self._parse_array_decl(decl))
- continue
- if self._peek_type() == "LPAREN":
- func = self._parse_function_decl(decl)
- decl = self._type_modify_decl(decl, func)
- continue
- break
- return decl
- # BNF: array_decl : '[' array_specifiers? assignment_expression? ']'
- def _parse_array_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
- return self._parse_array_decl_common(base_type=None, coord=base_decl.coord)
- def _parse_array_decl_common(
- self, base_type: Optional[c_ast.Node], coord: Optional[Coord] = None
- ) -> c_ast.Node:
- """Parse an array declarator suffix and return an ArrayDecl node.
- base_type:
- Base declarator node to attach (None for direct-declarator parsing,
- TypeDecl for abstract declarators).
- coord:
- Coordinate to use for the ArrayDecl. If None, uses the '[' token.
- """
- lbrack_tok = self._expect("LBRACKET")
- if coord is None:
- coord = self._tok_coord(lbrack_tok)
- def make_array_decl(dim, dim_quals):
- return c_ast.ArrayDecl(
- type=base_type, dim=dim, dim_quals=dim_quals, coord=coord
- )
- if self._accept("STATIC"):
- dim_quals = ["static"] + (self._parse_type_qualifier_list() or [])
- dim = self._parse_assignment_expression()
- self._expect("RBRACKET")
- return make_array_decl(dim, dim_quals)
- if self._peek_type() in _TYPE_QUALIFIER:
- dim_quals = self._parse_type_qualifier_list() or []
- if self._accept("STATIC"):
- dim_quals = dim_quals + ["static"]
- dim = self._parse_assignment_expression()
- self._expect("RBRACKET")
- return make_array_decl(dim, dim_quals)
- times_tok = self._accept("TIMES")
- if times_tok:
- self._expect("RBRACKET")
- dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
- return make_array_decl(dim, dim_quals)
- dim = None
- if self._starts_expression():
- dim = self._parse_assignment_expression()
- self._expect("RBRACKET")
- return make_array_decl(dim, dim_quals)
- times_tok = self._accept("TIMES")
- if times_tok:
- self._expect("RBRACKET")
- dim = c_ast.ID(times_tok.value, self._tok_coord(times_tok))
- return make_array_decl(dim, [])
- dim = None
- if self._starts_expression():
- dim = self._parse_assignment_expression()
- self._expect("RBRACKET")
- return make_array_decl(dim, [])
- # BNF: function_decl : '(' parameter_type_list_opt | identifier_list_opt ')'
- def _parse_function_decl(self, base_decl: c_ast.Node) -> c_ast.Node:
- self._expect("LPAREN")
- if self._accept("RPAREN"):
- args = None
- else:
- args = (
- self._parse_parameter_type_list()
- if self._starts_declaration()
- else self._parse_identifier_list_opt()
- )
- self._expect("RPAREN")
- func = c_ast.FuncDecl(args=args, type=None, coord=base_decl.coord)
- if self._peek_type() == "LBRACE":
- if func.args is not None:
- for param in func.args.params:
- if isinstance(param, c_ast.EllipsisParam):
- break
- name = getattr(param, "name", None)
- if name:
- self._add_identifier(name, param.coord)
- return func
- # BNF: pointer : '*' type_qualifier_list? pointer?
- def _parse_pointer(self) -> Optional[c_ast.Node]:
- stars = []
- times_tok = self._accept("TIMES")
- while times_tok:
- quals = self._parse_type_qualifier_list() or []
- stars.append((quals, self._tok_coord(times_tok)))
- times_tok = self._accept("TIMES")
- if not stars:
- return None
- ptr = None
- for quals, coord in stars:
- ptr = c_ast.PtrDecl(quals=quals, type=ptr, coord=coord)
- return ptr
- # BNF: parameter_type_list : parameter_list (',' ELLIPSIS)?
- def _parse_parameter_type_list(self) -> c_ast.ParamList:
- params = self._parse_parameter_list()
- if self._peek_type() == "COMMA" and self._peek_type(2) == "ELLIPSIS":
- self._advance()
- ell_tok = self._advance()
- params.params.append(c_ast.EllipsisParam(self._tok_coord(ell_tok)))
- return params
- # BNF: parameter_list : parameter_declaration (',' parameter_declaration)*
- def _parse_parameter_list(self) -> c_ast.ParamList:
- first = self._parse_parameter_declaration()
- params = c_ast.ParamList([first], first.coord)
- while self._peek_type() == "COMMA" and self._peek_type(2) != "ELLIPSIS":
- self._advance()
- params.params.append(self._parse_parameter_declaration())
- return params
- # BNF: parameter_declaration : declaration_specifiers declarator?
- # | declaration_specifiers abstract_declarator_opt
- def _parse_parameter_declaration(self) -> c_ast.Node:
- spec, _, spec_coord = self._parse_declaration_specifiers(allow_no_type=True)
- if not spec["type"]:
- spec["type"] = [c_ast.IdentifierType(["int"], coord=spec_coord)]
- if self._starts_declarator():
- decl, is_named = self._parse_any_declarator(
- allow_abstract=True, typeid_paren_as_abstract=True
- )
- if is_named:
- return self._build_declarations(
- spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
- )[0]
- return self._build_parameter_declaration(spec, decl, spec_coord)
- decl = self._parse_abstract_declarator_opt()
- return self._build_parameter_declaration(spec, decl, spec_coord)
- def _build_parameter_declaration(
- self, spec: "_DeclSpec", decl: Optional[c_ast.Node], spec_coord: Optional[Coord]
- ) -> c_ast.Node:
- if (
- len(spec["type"]) > 1
- and len(spec["type"][-1].names) == 1
- and self._is_type_in_scope(spec["type"][-1].names[0])
- ):
- return self._build_declarations(
- spec=spec, decls=[dict(decl=decl, init=None, bitsize=None)]
- )[0]
- decl = c_ast.Typename(
- name="",
- quals=spec["qual"],
- align=None,
- type=decl or c_ast.TypeDecl(None, None, None, None),
- coord=spec_coord,
- )
- return self._fix_decl_name_type(decl, spec["type"])
- # BNF: identifier_list_opt : identifier_list | empty
- def _parse_identifier_list_opt(self) -> Optional[c_ast.Node]:
- if self._peek_type() == "RPAREN":
- return None
- return self._parse_identifier_list()
- # BNF: identifier_list : identifier (',' identifier)*
- def _parse_identifier_list(self) -> c_ast.Node:
- first = self._parse_identifier()
- params = c_ast.ParamList([first], first.coord)
- while self._accept("COMMA"):
- params.params.append(self._parse_identifier())
- return params
- # ------------------------------------------------------------------
- # Abstract declarators
- # ------------------------------------------------------------------
- # BNF: type_name : specifier_qualifier_list abstract_declarator_opt
- def _parse_type_name(self) -> c_ast.Typename:
- spec = self._parse_specifier_qualifier_list()
- decl = self._parse_abstract_declarator_opt()
- coord = None
- if decl is not None:
- coord = decl.coord
- elif spec["type"]:
- coord = spec["type"][0].coord
- typename = c_ast.Typename(
- name="",
- quals=spec["qual"][:],
- align=None,
- type=decl or c_ast.TypeDecl(None, None, None, None),
- coord=coord,
- )
- return cast(c_ast.Typename, self._fix_decl_name_type(typename, spec["type"]))
- # BNF: abstract_declarator_opt : pointer? direct_abstract_declarator?
- def _parse_abstract_declarator_opt(self) -> Optional[c_ast.Node]:
- if self._peek_type() == "TIMES":
- ptr = self._parse_pointer()
- if self._starts_direct_abstract_declarator():
- decl = self._parse_direct_abstract_declarator()
- else:
- decl = c_ast.TypeDecl(None, None, None, None)
- assert ptr is not None
- return self._type_modify_decl(decl, ptr)
- if self._starts_direct_abstract_declarator():
- return self._parse_direct_abstract_declarator()
- return None
- # BNF: direct_abstract_declarator : '(' parameter_type_list_opt ')'
- # | '(' abstract_declarator ')'
- # | '[' ... ']'
- def _parse_direct_abstract_declarator(self) -> c_ast.Node:
- lparen_tok = self._accept("LPAREN")
- if lparen_tok:
- if self._starts_declaration() or self._peek_type() == "RPAREN":
- params = self._parse_parameter_type_list_opt()
- self._expect("RPAREN")
- decl = c_ast.FuncDecl(
- args=params,
- type=c_ast.TypeDecl(None, None, None, None),
- coord=self._tok_coord(lparen_tok),
- )
- else:
- decl = self._parse_abstract_declarator_opt()
- self._expect("RPAREN")
- assert decl is not None
- elif self._peek_type() == "LBRACKET":
- decl = self._parse_abstract_array_base()
- else:
- self._parse_error("Invalid abstract declarator", self.clex.filename)
- return self._parse_decl_suffixes(decl)
- # BNF: parameter_type_list_opt : parameter_type_list | empty
- def _parse_parameter_type_list_opt(self) -> Optional[c_ast.ParamList]:
- if self._peek_type() == "RPAREN":
- return None
- return self._parse_parameter_type_list()
- # BNF: abstract_array_base : '[' array_specifiers? assignment_expression? ']'
- def _parse_abstract_array_base(self) -> c_ast.Node:
- return self._parse_array_decl_common(
- base_type=c_ast.TypeDecl(None, None, None, None), coord=None
- )
- # ------------------------------------------------------------------
- # Statements
- # ------------------------------------------------------------------
- # BNF: statement : labeled_statement | compound_statement
- # | selection_statement | iteration_statement
- # | jump_statement | expression_statement
- # | static_assert | pppragma_directive
- def _parse_statement(self) -> c_ast.Node | List[c_ast.Node]:
- tok_type = self._peek_type()
- match tok_type:
- case "CASE" | "DEFAULT":
- return self._parse_labeled_statement()
- case "ID" if self._peek_type(2) == "COLON":
- return self._parse_labeled_statement()
- case "LBRACE":
- return self._parse_compound_statement()
- case "IF" | "SWITCH":
- return self._parse_selection_statement()
- case "WHILE" | "DO" | "FOR":
- return self._parse_iteration_statement()
- case "GOTO" | "BREAK" | "CONTINUE" | "RETURN":
- return self._parse_jump_statement()
- case "PPPRAGMA" | "_PRAGMA":
- return self._parse_pppragma_directive()
- case "_STATIC_ASSERT":
- return self._parse_static_assert()
- case _:
- return self._parse_expression_statement()
- # BNF: pragmacomp_or_statement : pppragma_directive* statement
- def _parse_pragmacomp_or_statement(self) -> c_ast.Node | List[c_ast.Node]:
- if self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
- pragmas = self._parse_pppragma_directive_list()
- stmt = self._parse_statement()
- return c_ast.Compound(block_items=pragmas + [stmt], coord=pragmas[0].coord)
- return self._parse_statement()
- # BNF: block_item : declaration | statement
- def _parse_block_item(self) -> c_ast.Node | List[c_ast.Node]:
- if self._starts_declaration():
- return self._parse_declaration()
- return self._parse_statement()
- # BNF: block_item_list : block_item+
- def _parse_block_item_list(self) -> List[c_ast.Node]:
- items = []
- while self._peek_type() not in {"RBRACE", None}:
- item = self._parse_block_item()
- if isinstance(item, list):
- if item == [None]:
- continue
- items.extend(item)
- else:
- items.append(item)
- return items
- # BNF: compound_statement : '{' block_item_list? '}'
- def _parse_compound_statement(self) -> c_ast.Node:
- lbrace_tok = self._expect("LBRACE")
- if self._accept("RBRACE"):
- return c_ast.Compound(block_items=None, coord=self._tok_coord(lbrace_tok))
- block_items = self._parse_block_item_list()
- self._expect("RBRACE")
- return c_ast.Compound(
- block_items=block_items, coord=self._tok_coord(lbrace_tok)
- )
- # BNF: labeled_statement : ID ':' statement
- # | CASE constant_expression ':' statement
- # | DEFAULT ':' statement
- def _parse_labeled_statement(self) -> c_ast.Node:
- tok_type = self._peek_type()
- match tok_type:
- case "ID":
- name_tok = self._advance()
- self._expect("COLON")
- if self._starts_statement():
- stmt = self._parse_pragmacomp_or_statement()
- else:
- stmt = c_ast.EmptyStatement(self._tok_coord(name_tok))
- return c_ast.Label(name_tok.value, stmt, self._tok_coord(name_tok))
- case "CASE":
- case_tok = self._advance()
- expr = self._parse_constant_expression()
- self._expect("COLON")
- if self._starts_statement():
- stmt = self._parse_pragmacomp_or_statement()
- else:
- stmt = c_ast.EmptyStatement(self._tok_coord(case_tok))
- return c_ast.Case(expr, [stmt], self._tok_coord(case_tok))
- case "DEFAULT":
- def_tok = self._advance()
- self._expect("COLON")
- if self._starts_statement():
- stmt = self._parse_pragmacomp_or_statement()
- else:
- stmt = c_ast.EmptyStatement(self._tok_coord(def_tok))
- return c_ast.Default([stmt], self._tok_coord(def_tok))
- case _:
- self._parse_error("Invalid labeled statement", self.clex.filename)
- # BNF: selection_statement : IF '(' expression ')' statement (ELSE statement)?
- # | SWITCH '(' expression ')' statement
- def _parse_selection_statement(self) -> c_ast.Node:
- tok = self._advance()
- match tok.type:
- case "IF":
- self._expect("LPAREN")
- cond = self._parse_expression()
- self._expect("RPAREN")
- then_stmt = self._parse_pragmacomp_or_statement()
- if self._accept("ELSE"):
- else_stmt = self._parse_pragmacomp_or_statement()
- return c_ast.If(cond, then_stmt, else_stmt, self._tok_coord(tok))
- return c_ast.If(cond, then_stmt, None, self._tok_coord(tok))
- case "SWITCH":
- self._expect("LPAREN")
- expr = self._parse_expression()
- self._expect("RPAREN")
- stmt = self._parse_pragmacomp_or_statement()
- return fix_switch_cases(c_ast.Switch(expr, stmt, self._tok_coord(tok)))
- case _:
- self._parse_error("Invalid selection statement", self._tok_coord(tok))
- # BNF: iteration_statement : WHILE '(' expression ')' statement
- # | DO statement WHILE '(' expression ')' ';'
- # | FOR '(' (declaration | expression_opt) ';'
- # expression_opt ';' expression_opt ')' statement
- def _parse_iteration_statement(self) -> c_ast.Node:
- tok = self._advance()
- match tok.type:
- case "WHILE":
- self._expect("LPAREN")
- cond = self._parse_expression()
- self._expect("RPAREN")
- stmt = self._parse_pragmacomp_or_statement()
- return c_ast.While(cond, stmt, self._tok_coord(tok))
- case "DO":
- stmt = self._parse_pragmacomp_or_statement()
- self._expect("WHILE")
- self._expect("LPAREN")
- cond = self._parse_expression()
- self._expect("RPAREN")
- self._expect("SEMI")
- return c_ast.DoWhile(cond, stmt, self._tok_coord(tok))
- case "FOR":
- self._expect("LPAREN")
- if self._starts_declaration():
- decls = self._parse_declaration()
- init = c_ast.DeclList(decls, self._tok_coord(tok))
- cond = self._parse_expression_opt()
- self._expect("SEMI")
- next_expr = self._parse_expression_opt()
- self._expect("RPAREN")
- stmt = self._parse_pragmacomp_or_statement()
- return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
- init = self._parse_expression_opt()
- self._expect("SEMI")
- cond = self._parse_expression_opt()
- self._expect("SEMI")
- next_expr = self._parse_expression_opt()
- self._expect("RPAREN")
- stmt = self._parse_pragmacomp_or_statement()
- return c_ast.For(init, cond, next_expr, stmt, self._tok_coord(tok))
- case _:
- self._parse_error("Invalid iteration statement", self._tok_coord(tok))
- # BNF: jump_statement : GOTO ID ';' | BREAK ';' | CONTINUE ';'
- # | RETURN expression? ';'
- def _parse_jump_statement(self) -> c_ast.Node:
- tok = self._advance()
- match tok.type:
- case "GOTO":
- name_tok = self._expect("ID")
- self._expect("SEMI")
- return c_ast.Goto(name_tok.value, self._tok_coord(tok))
- case "BREAK":
- self._expect("SEMI")
- return c_ast.Break(self._tok_coord(tok))
- case "CONTINUE":
- self._expect("SEMI")
- return c_ast.Continue(self._tok_coord(tok))
- case "RETURN":
- if self._accept("SEMI"):
- return c_ast.Return(None, self._tok_coord(tok))
- expr = self._parse_expression()
- self._expect("SEMI")
- return c_ast.Return(expr, self._tok_coord(tok))
- case _:
- self._parse_error("Invalid jump statement", self._tok_coord(tok))
- # BNF: expression_statement : expression_opt ';'
- def _parse_expression_statement(self) -> c_ast.Node:
- expr = self._parse_expression_opt()
- semi_tok = self._expect("SEMI")
- if expr is None:
- return c_ast.EmptyStatement(self._tok_coord(semi_tok))
- return expr
- # ------------------------------------------------------------------
- # Expressions
- # ------------------------------------------------------------------
- # BNF: expression_opt : expression | empty
- def _parse_expression_opt(self) -> Optional[c_ast.Node]:
- if self._starts_expression():
- return self._parse_expression()
- return None
- # BNF: expression : assignment_expression (',' assignment_expression)*
- def _parse_expression(self) -> c_ast.Node:
- expr = self._parse_assignment_expression()
- if not self._accept("COMMA"):
- return expr
- exprs = [expr, self._parse_assignment_expression()]
- while self._accept("COMMA"):
- exprs.append(self._parse_assignment_expression())
- return c_ast.ExprList(exprs, expr.coord)
- # BNF: assignment_expression : conditional_expression
- # | unary_expression assignment_op assignment_expression
- def _parse_assignment_expression(self) -> c_ast.Node:
- if self._peek_type() == "LPAREN" and self._peek_type(2) == "LBRACE":
- self._advance()
- comp = self._parse_compound_statement()
- self._expect("RPAREN")
- return comp
- expr = self._parse_conditional_expression()
- if self._is_assignment_op():
- op = self._advance().value
- rhs = self._parse_assignment_expression()
- return c_ast.Assignment(op, expr, rhs, expr.coord)
- return expr
- # BNF: conditional_expression : binary_expression
- # | binary_expression '?' expression ':' conditional_expression
- def _parse_conditional_expression(self) -> c_ast.Node:
- expr = self._parse_binary_expression()
- if self._accept("CONDOP"):
- iftrue = self._parse_expression()
- self._expect("COLON")
- iffalse = self._parse_conditional_expression()
- return c_ast.TernaryOp(expr, iftrue, iffalse, expr.coord)
- return expr
- # BNF: binary_expression : cast_expression (binary_op cast_expression)*
- def _parse_binary_expression(
- self, min_prec: int = 0, lhs: Optional[c_ast.Node] = None
- ) -> c_ast.Node:
- if lhs is None:
- lhs = self._parse_cast_expression()
- while True:
- tok = self._peek()
- if tok is None or tok.type not in _BINARY_PRECEDENCE:
- break
- prec = _BINARY_PRECEDENCE[tok.type]
- if prec < min_prec:
- break
- op = tok.value
- self._advance()
- rhs = self._parse_cast_expression()
- while True:
- next_tok = self._peek()
- if next_tok is None or next_tok.type not in _BINARY_PRECEDENCE:
- break
- next_prec = _BINARY_PRECEDENCE[next_tok.type]
- if next_prec > prec:
- rhs = self._parse_binary_expression(next_prec, rhs)
- else:
- break
- lhs = c_ast.BinaryOp(op, lhs, rhs, lhs.coord)
- return lhs
- # BNF: cast_expression : '(' type_name ')' cast_expression
- # | unary_expression
- def _parse_cast_expression(self) -> c_ast.Node:
- result = self._try_parse_paren_type_name()
- if result is not None:
- typ, mark, lparen_tok = result
- if self._peek_type() == "LBRACE":
- # (type){...} is a compound literal, not a cast. Examples:
- # (int){1} -> compound literal, handled in postfix
- # (int) x -> cast, handled below
- self._reset(mark)
- else:
- expr = self._parse_cast_expression()
- return c_ast.Cast(typ, expr, self._tok_coord(lparen_tok))
- return self._parse_unary_expression()
- # BNF: unary_expression : postfix_expression
- # | '++' unary_expression
- # | '--' unary_expression
- # | unary_op cast_expression
- # | 'sizeof' unary_expression
- # | 'sizeof' '(' type_name ')'
- # | '_Alignof' '(' type_name ')'
- def _parse_unary_expression(self) -> c_ast.Node:
- tok_type = self._peek_type()
- if tok_type in {"PLUSPLUS", "MINUSMINUS"}:
- tok = self._advance()
- expr = self._parse_unary_expression()
- return c_ast.UnaryOp(tok.value, expr, expr.coord)
- if tok_type in {"AND", "TIMES", "PLUS", "MINUS", "NOT", "LNOT"}:
- tok = self._advance()
- expr = self._parse_cast_expression()
- return c_ast.UnaryOp(tok.value, expr, expr.coord)
- if tok_type == "SIZEOF":
- tok = self._advance()
- result = self._try_parse_paren_type_name()
- if result is not None:
- typ, _, _ = result
- return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
- expr = self._parse_unary_expression()
- return c_ast.UnaryOp(tok.value, expr, self._tok_coord(tok))
- if tok_type == "_ALIGNOF":
- tok = self._advance()
- self._expect("LPAREN")
- typ = self._parse_type_name()
- self._expect("RPAREN")
- return c_ast.UnaryOp(tok.value, typ, self._tok_coord(tok))
- return self._parse_postfix_expression()
- # BNF: postfix_expression : primary_expression postfix_suffix*
- # | '(' type_name ')' '{' initializer_list ','? '}'
- def _parse_postfix_expression(self) -> c_ast.Node:
- result = self._try_parse_paren_type_name()
- if result is not None:
- typ, mark, _ = result
- # Disambiguate between casts and compound literals:
- # (int) x -> cast
- # (int) {1} -> compound literal
- if self._accept("LBRACE"):
- init = self._parse_initializer_list()
- self._accept("COMMA")
- self._expect("RBRACE")
- return c_ast.CompoundLiteral(typ, init)
- else:
- self._reset(mark)
- expr = self._parse_primary_expression()
- while True:
- if self._accept("LBRACKET"):
- sub = self._parse_expression()
- self._expect("RBRACKET")
- expr = c_ast.ArrayRef(expr, sub, expr.coord)
- continue
- if self._accept("LPAREN"):
- if self._peek_type() == "RPAREN":
- self._advance()
- args = None
- else:
- args = self._parse_argument_expression_list()
- self._expect("RPAREN")
- expr = c_ast.FuncCall(expr, args, expr.coord)
- continue
- if self._peek_type() in {"PERIOD", "ARROW"}:
- op_tok = self._advance()
- name_tok = self._advance()
- if name_tok.type not in {"ID", "TYPEID"}:
- self._parse_error(
- "Invalid struct reference", self._tok_coord(name_tok)
- )
- field = c_ast.ID(name_tok.value, self._tok_coord(name_tok))
- expr = c_ast.StructRef(expr, op_tok.value, field, expr.coord)
- continue
- if self._peek_type() in {"PLUSPLUS", "MINUSMINUS"}:
- tok = self._advance()
- expr = c_ast.UnaryOp("p" + tok.value, expr, expr.coord)
- continue
- break
- return expr
- # BNF: primary_expression : ID | constant | string_literal
- # | '(' expression ')' | offsetof
- def _parse_primary_expression(self) -> c_ast.Node:
- tok_type = self._peek_type()
- if tok_type == "ID":
- return self._parse_identifier()
- if (
- tok_type in _INT_CONST
- or tok_type in _FLOAT_CONST
- or tok_type in _CHAR_CONST
- ):
- return self._parse_constant()
- if tok_type in _STRING_LITERAL:
- return self._parse_unified_string_literal()
- if tok_type in _WSTR_LITERAL:
- return self._parse_unified_wstring_literal()
- if tok_type == "LPAREN":
- self._advance()
- expr = self._parse_expression()
- self._expect("RPAREN")
- return expr
- if tok_type == "OFFSETOF":
- off_tok = self._advance()
- self._expect("LPAREN")
- typ = self._parse_type_name()
- self._expect("COMMA")
- designator = self._parse_offsetof_member_designator()
- self._expect("RPAREN")
- coord = self._tok_coord(off_tok)
- return c_ast.FuncCall(
- c_ast.ID(off_tok.value, coord),
- c_ast.ExprList([typ, designator], coord),
- coord,
- )
- self._parse_error("Invalid expression", self.clex.filename)
- # BNF: offsetof_member_designator : identifier_or_typeid
- # ('.' identifier_or_typeid | '[' expression ']')*
- def _parse_offsetof_member_designator(self) -> c_ast.Node:
- node = self._parse_identifier_or_typeid()
- while True:
- if self._accept("PERIOD"):
- field = self._parse_identifier_or_typeid()
- node = c_ast.StructRef(node, ".", field, node.coord)
- continue
- if self._accept("LBRACKET"):
- expr = self._parse_expression()
- self._expect("RBRACKET")
- node = c_ast.ArrayRef(node, expr, node.coord)
- continue
- break
- return node
- # BNF: argument_expression_list : assignment_expression (',' assignment_expression)*
- def _parse_argument_expression_list(self) -> c_ast.Node:
- expr = self._parse_assignment_expression()
- exprs = [expr]
- while self._accept("COMMA"):
- exprs.append(self._parse_assignment_expression())
- return c_ast.ExprList(exprs, expr.coord)
- # BNF: constant_expression : conditional_expression
- def _parse_constant_expression(self) -> c_ast.Node:
- return self._parse_conditional_expression()
- # ------------------------------------------------------------------
- # Terminals
- # ------------------------------------------------------------------
- # BNF: identifier : ID
- def _parse_identifier(self) -> c_ast.Node:
- tok = self._expect("ID")
- return c_ast.ID(tok.value, self._tok_coord(tok))
- # BNF: identifier_or_typeid : ID | TYPEID
- def _parse_identifier_or_typeid(self) -> c_ast.Node:
- tok = self._advance()
- if tok.type not in {"ID", "TYPEID"}:
- self._parse_error("Expected identifier", self._tok_coord(tok))
- return c_ast.ID(tok.value, self._tok_coord(tok))
- # BNF: constant : INT_CONST | FLOAT_CONST | CHAR_CONST
- def _parse_constant(self) -> c_ast.Node:
- tok = self._advance()
- if tok.type in _INT_CONST:
- u_count = 0
- l_count = 0
- for ch in tok.value[-3:]:
- if ch in ("l", "L"):
- l_count += 1
- elif ch in ("u", "U"):
- u_count += 1
- if u_count > 1:
- raise ValueError("Constant cannot have more than one u/U suffix.")
- if l_count > 2:
- raise ValueError("Constant cannot have more than two l/L suffix.")
- prefix = "unsigned " * u_count + "long " * l_count
- return c_ast.Constant(prefix + "int", tok.value, self._tok_coord(tok))
- if tok.type in _FLOAT_CONST:
- if tok.value[-1] in ("f", "F"):
- t = "float"
- elif tok.value[-1] in ("l", "L"):
- t = "long double"
- else:
- t = "double"
- return c_ast.Constant(t, tok.value, self._tok_coord(tok))
- if tok.type in _CHAR_CONST:
- return c_ast.Constant("char", tok.value, self._tok_coord(tok))
- self._parse_error("Invalid constant", self._tok_coord(tok))
- # BNF: unified_string_literal : STRING_LITERAL+
- def _parse_unified_string_literal(self) -> c_ast.Node:
- tok = self._expect("STRING_LITERAL")
- node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
- while self._peek_type() == "STRING_LITERAL":
- tok2 = self._advance()
- node.value = node.value[:-1] + tok2.value[1:]
- return node
- # BNF: unified_wstring_literal : WSTRING_LITERAL+
- def _parse_unified_wstring_literal(self) -> c_ast.Node:
- tok = self._advance()
- if tok.type not in _WSTR_LITERAL:
- self._parse_error("Invalid string literal", self._tok_coord(tok))
- node = c_ast.Constant("string", tok.value, self._tok_coord(tok))
- while self._peek_type() in _WSTR_LITERAL:
- tok2 = self._advance()
- node.value = node.value.rstrip()[:-1] + tok2.value[2:]
- return node
- # ------------------------------------------------------------------
- # Initializers
- # ------------------------------------------------------------------
- # BNF: initializer : assignment_expression
- # | '{' initializer_list ','? '}'
- # | '{' '}'
- def _parse_initializer(self) -> c_ast.Node:
- lbrace_tok = self._accept("LBRACE")
- if lbrace_tok:
- if self._accept("RBRACE"):
- return c_ast.InitList([], self._tok_coord(lbrace_tok))
- init_list = self._parse_initializer_list()
- self._accept("COMMA")
- self._expect("RBRACE")
- return init_list
- return self._parse_assignment_expression()
- # BNF: initializer_list : initializer_item (',' initializer_item)* ','?
- def _parse_initializer_list(self) -> c_ast.Node:
- items = [self._parse_initializer_item()]
- while self._accept("COMMA"):
- if self._peek_type() == "RBRACE":
- break
- items.append(self._parse_initializer_item())
- return c_ast.InitList(items, items[0].coord)
- # BNF: initializer_item : designation? initializer
- def _parse_initializer_item(self) -> c_ast.Node:
- designation = None
- if self._peek_type() in {"LBRACKET", "PERIOD"}:
- designation = self._parse_designation()
- init = self._parse_initializer()
- if designation is not None:
- return c_ast.NamedInitializer(designation, init)
- return init
- # BNF: designation : designator_list '='
- def _parse_designation(self) -> List[c_ast.Node]:
- designators = self._parse_designator_list()
- self._expect("EQUALS")
- return designators
- # BNF: designator_list : designator+
- def _parse_designator_list(self) -> List[c_ast.Node]:
- designators = []
- while self._peek_type() in {"LBRACKET", "PERIOD"}:
- designators.append(self._parse_designator())
- return designators
- # BNF: designator : '[' constant_expression ']'
- # | '.' identifier_or_typeid
- def _parse_designator(self) -> c_ast.Node:
- if self._accept("LBRACKET"):
- expr = self._parse_constant_expression()
- self._expect("RBRACKET")
- return expr
- if self._accept("PERIOD"):
- return self._parse_identifier_or_typeid()
- self._parse_error("Invalid designator", self.clex.filename)
- # ------------------------------------------------------------------
- # Preprocessor-like directives
- # ------------------------------------------------------------------
- # BNF: pp_directive : '#' ... (unsupported)
- def _parse_pp_directive(self) -> NoReturn:
- tok = self._expect("PPHASH")
- self._parse_error("Directives not supported yet", self._tok_coord(tok))
- # BNF: pppragma_directive : PPPRAGMA PPPRAGMASTR?
- # | _PRAGMA '(' string_literal ')'
- def _parse_pppragma_directive(self) -> c_ast.Node:
- if self._peek_type() == "PPPRAGMA":
- tok = self._advance()
- if self._peek_type() == "PPPRAGMASTR":
- str_tok = self._advance()
- return c_ast.Pragma(str_tok.value, self._tok_coord(str_tok))
- return c_ast.Pragma("", self._tok_coord(tok))
- if self._peek_type() == "_PRAGMA":
- tok = self._advance()
- lparen = self._expect("LPAREN")
- literal = self._parse_unified_string_literal()
- self._expect("RPAREN")
- return c_ast.Pragma(literal, self._tok_coord(lparen))
- self._parse_error("Invalid pragma", self.clex.filename)
- # BNF: pppragma_directive_list : pppragma_directive+
- def _parse_pppragma_directive_list(self) -> List[c_ast.Node]:
- pragmas = []
- while self._peek_type() in {"PPPRAGMA", "_PRAGMA"}:
- pragmas.append(self._parse_pppragma_directive())
- return pragmas
- # BNF: static_assert : _STATIC_ASSERT '(' constant_expression (',' string_literal)? ')'
- def _parse_static_assert(self) -> List[c_ast.Node]:
- tok = self._expect("_STATIC_ASSERT")
- self._expect("LPAREN")
- cond = self._parse_constant_expression()
- msg = None
- if self._accept("COMMA"):
- msg = self._parse_unified_string_literal()
- self._expect("RPAREN")
- return [c_ast.StaticAssert(cond, msg, self._tok_coord(tok))]
- _ASSIGNMENT_OPS = {
- "EQUALS",
- "XOREQUAL",
- "TIMESEQUAL",
- "DIVEQUAL",
- "MODEQUAL",
- "PLUSEQUAL",
- "MINUSEQUAL",
- "LSHIFTEQUAL",
- "RSHIFTEQUAL",
- "ANDEQUAL",
- "OREQUAL",
- }
- # Precedence of operators (lower number = weather binding)
- # If this changes, c_generator.CGenerator.precedence_map needs to change as
- # well
- _BINARY_PRECEDENCE = {
- "LOR": 0,
- "LAND": 1,
- "OR": 2,
- "XOR": 3,
- "AND": 4,
- "EQ": 5,
- "NE": 5,
- "GT": 6,
- "GE": 6,
- "LT": 6,
- "LE": 6,
- "RSHIFT": 7,
- "LSHIFT": 7,
- "PLUS": 8,
- "MINUS": 8,
- "TIMES": 9,
- "DIVIDE": 9,
- "MOD": 9,
- }
- _STORAGE_CLASS = {"AUTO", "REGISTER", "STATIC", "EXTERN", "TYPEDEF", "_THREAD_LOCAL"}
- _FUNCTION_SPEC = {"INLINE", "_NORETURN"}
- _TYPE_QUALIFIER = {"CONST", "RESTRICT", "VOLATILE", "_ATOMIC"}
- _TYPE_SPEC_SIMPLE = {
- "VOID",
- "_BOOL",
- "CHAR",
- "SHORT",
- "INT",
- "LONG",
- "FLOAT",
- "DOUBLE",
- "_COMPLEX",
- "SIGNED",
- "UNSIGNED",
- "__INT128",
- }
- _DECL_START = (
- _STORAGE_CLASS
- | _FUNCTION_SPEC
- | _TYPE_QUALIFIER
- | _TYPE_SPEC_SIMPLE
- | {"TYPEID", "STRUCT", "UNION", "ENUM", "_ALIGNAS", "_ATOMIC"}
- )
- _EXPR_START = {
- "ID",
- "LPAREN",
- "PLUSPLUS",
- "MINUSMINUS",
- "PLUS",
- "MINUS",
- "TIMES",
- "AND",
- "NOT",
- "LNOT",
- "SIZEOF",
- "_ALIGNOF",
- "OFFSETOF",
- }
- _INT_CONST = {
- "INT_CONST_DEC",
- "INT_CONST_OCT",
- "INT_CONST_HEX",
- "INT_CONST_BIN",
- "INT_CONST_CHAR",
- }
- _FLOAT_CONST = {"FLOAT_CONST", "HEX_FLOAT_CONST"}
- _CHAR_CONST = {
- "CHAR_CONST",
- "WCHAR_CONST",
- "U8CHAR_CONST",
- "U16CHAR_CONST",
- "U32CHAR_CONST",
- }
- _STRING_LITERAL = {"STRING_LITERAL"}
- _WSTR_LITERAL = {
- "WSTRING_LITERAL",
- "U8STRING_LITERAL",
- "U16STRING_LITERAL",
- "U32STRING_LITERAL",
- }
- _STARTS_EXPRESSION = (
- _EXPR_START
- | _INT_CONST
- | _FLOAT_CONST
- | _CHAR_CONST
- | _STRING_LITERAL
- | _WSTR_LITERAL
- )
- _STARTS_STATEMENT = {
- "LBRACE",
- "IF",
- "SWITCH",
- "WHILE",
- "DO",
- "FOR",
- "GOTO",
- "BREAK",
- "CONTINUE",
- "RETURN",
- "CASE",
- "DEFAULT",
- "PPPRAGMA",
- "_PRAGMA",
- "_STATIC_ASSERT",
- "SEMI",
- }
- class _TokenStream:
- """Wraps a lexer to provide convenient, buffered access to the underlying
- token stream. The lexer is expected to be initialized with the input
- string already.
- """
- def __init__(self, lexer: CLexer) -> None:
- self._lexer = lexer
- self._buffer: List[Optional[_Token]] = []
- self._index = 0
- def peek(self, k: int = 1) -> Optional[_Token]:
- """Peek at the k-th next token in the stream, without consuming it.
- Examples:
- k=1 returns the immediate next token.
- k=2 returns the token after that.
- """
- if k <= 0:
- return None
- self._fill(k)
- return self._buffer[self._index + k - 1]
- def next(self) -> Optional[_Token]:
- """Consume a single token and return it."""
- self._fill(1)
- tok = self._buffer[self._index]
- self._index += 1
- return tok
- # The 'mark' and 'reset' methods are useful for speculative parsing with
- # backtracking; when the parser needs to examine a sequence of tokens
- # and potentially decide to try a different path on the same sequence, it
- # can call 'mark' to obtain the current token position, and if the first
- # path fails restore the position with `reset(pos)`.
- def mark(self) -> int:
- return self._index
- def reset(self, mark: int) -> None:
- self._index = mark
- def _fill(self, n: int) -> None:
- while len(self._buffer) < self._index + n:
- tok = self._lexer.token()
- self._buffer.append(tok)
- if tok is None:
- break
- # Declaration specifiers are represented by a dictionary with entries:
- # - qual: a list of type qualifiers
- # - storage: a list of storage class specifiers
- # - type: a list of type specifiers
- # - function: a list of function specifiers
- # - alignment: a list of alignment specifiers
- class _DeclSpec(TypedDict):
- qual: List[Any]
- storage: List[Any]
- type: List[Any]
- function: List[Any]
- alignment: List[Any]
- _DeclSpecKind = Literal["qual", "storage", "type", "function", "alignment"]
- class _DeclInfo(TypedDict):
- # Declarator payloads used by declaration/initializer parsing:
- # - decl: the declarator node (may be None for abstract/implicit cases)
- # - init: optional initializer expression
- # - bitsize: optional bit-field width expression (for struct declarators)
- decl: Optional[c_ast.Node]
- init: Optional[c_ast.Node]
- bitsize: Optional[c_ast.Node]
|