parser.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. from itertools import chain
  2. from .ast import AtRule, Declaration, ParseError, QualifiedRule
  3. from .tokenizer import parse_component_value_list
  4. def _to_token_iterator(input, skip_comments=False):
  5. """Iterate component values out of string or component values iterable.
  6. :type input: :obj:`str` or :term:`iterable`
  7. :param input: A string or an iterable of :term:`component values`.
  8. :type skip_comments: :obj:`bool`
  9. :param skip_comments: If the input is a string, ignore all CSS comments.
  10. :returns: An iterator yielding :term:`component values`.
  11. """
  12. if isinstance(input, str):
  13. input = parse_component_value_list(input, skip_comments)
  14. return iter(input)
  15. def _next_significant(tokens):
  16. """Return the next significant (neither whitespace or comment) token.
  17. :type tokens: :term:`iterator`
  18. :param tokens: An iterator yielding :term:`component values`.
  19. :returns: A :term:`component value`, or :obj:`None`.
  20. """
  21. for token in tokens:
  22. if token.type not in ('whitespace', 'comment'):
  23. return token
  24. def parse_one_component_value(input, skip_comments=False):
  25. """Parse a single :diagram:`component value`.
  26. This is used e.g. for an attribute value
  27. referred to by ``attr(foo length)``.
  28. :type input: :obj:`str` or :term:`iterable`
  29. :param input: A string or an iterable of :term:`component values`.
  30. :type skip_comments: :obj:`bool`
  31. :param skip_comments: If the input is a string, ignore all CSS comments.
  32. :returns:
  33. A :term:`component value` (that is neither whitespace or comment),
  34. or a :class:`~tinycss2.ast.ParseError`.
  35. """
  36. tokens = _to_token_iterator(input, skip_comments)
  37. first = _next_significant(tokens)
  38. second = _next_significant(tokens)
  39. if first is None:
  40. return ParseError(1, 1, 'empty', 'Input is empty')
  41. if second is not None:
  42. return ParseError(
  43. second.source_line, second.source_column, 'extra-input',
  44. 'Got more than one token')
  45. else:
  46. return first
  47. def parse_one_declaration(input, skip_comments=False):
  48. """Parse a single :diagram:`declaration`.
  49. This is used e.g. for a declaration in an `@supports
  50. <https://drafts.csswg.org/css-conditional/#at-supports>`_ test.
  51. :type input: :obj:`str` or :term:`iterable`
  52. :param input: A string or an iterable of :term:`component values`.
  53. :type skip_comments: :obj:`bool`
  54. :param skip_comments: If the input is a string, ignore all CSS comments.
  55. :returns:
  56. A :class:`~tinycss2.ast.Declaration`
  57. or :class:`~tinycss2.ast.ParseError`.
  58. Any whitespace or comment before the ``:`` colon is dropped.
  59. """
  60. tokens = _to_token_iterator(input, skip_comments)
  61. first_token = _next_significant(tokens)
  62. if first_token is None:
  63. return ParseError(1, 1, 'empty', 'Input is empty')
  64. return _parse_declaration(first_token, tokens)
  65. def _consume_remnants(input, nested):
  66. for token in input:
  67. if token == ';':
  68. return
  69. elif nested and token == '}':
  70. return
  71. def _parse_declaration(first_token, tokens, nested=True):
  72. """Parse a declaration.
  73. Consume :obj:`tokens` until the end of the declaration or the first error.
  74. :type first_token: :term:`component value`
  75. :param first_token: The first component value of the rule.
  76. :type tokens: :term:`iterator`
  77. :param tokens: An iterator yielding :term:`component values`.
  78. :type nested: :obj:`bool`
  79. :param nested: Whether the declaration is nested or top-level.
  80. :returns:
  81. A :class:`~tinycss2.ast.Declaration`
  82. or :class:`~tinycss2.ast.ParseError`.
  83. """
  84. name = first_token
  85. if name.type != 'ident':
  86. _consume_remnants(tokens, nested)
  87. return ParseError(
  88. name.source_line, name.source_column, 'invalid',
  89. f'Expected <ident> for declaration name, got {name.type}.')
  90. colon = _next_significant(tokens)
  91. if colon is None:
  92. _consume_remnants(tokens, nested)
  93. return ParseError(
  94. name.source_line, name.source_column, 'invalid',
  95. "Expected ':' after declaration name, got EOF")
  96. elif colon != ':':
  97. _consume_remnants(tokens, nested)
  98. return ParseError(
  99. colon.source_line, colon.source_column, 'invalid',
  100. "Expected ':' after declaration name, got {colon.type}.")
  101. value = []
  102. state = 'value'
  103. contains_non_whitespace = False
  104. contains_simple_block = False
  105. for i, token in enumerate(tokens):
  106. if state == 'value' and token == '!':
  107. state = 'bang'
  108. bang_position = i
  109. elif (state == 'bang' and token.type == 'ident'
  110. and token.lower_value == 'important'):
  111. state = 'important'
  112. elif token.type not in ('whitespace', 'comment'):
  113. state = 'value'
  114. if token.type == '{} block':
  115. if contains_non_whitespace:
  116. contains_simple_block = True
  117. else:
  118. contains_non_whitespace = True
  119. else:
  120. contains_non_whitespace = True
  121. value.append(token)
  122. if state == 'important':
  123. del value[bang_position:]
  124. # TODO: Handle custom property names
  125. if contains_simple_block and contains_non_whitespace:
  126. return ParseError(
  127. colon.source_line, colon.source_column, 'invalid',
  128. 'Declaration contains {} block')
  129. # TODO: Handle unicode-range
  130. return Declaration(
  131. name.source_line, name.source_column, name.value, name.lower_value,
  132. value, state == 'important')
  133. def _consume_blocks_content(first_token, tokens):
  134. """Consume declaration or nested rule."""
  135. declaration_tokens = []
  136. semicolon_token = []
  137. if first_token != ';' and first_token.type != '{} block':
  138. for token in tokens:
  139. if token == ';':
  140. semicolon_token.append(token)
  141. break
  142. declaration_tokens.append(token)
  143. if token.type == '{} block':
  144. break
  145. declaration = _parse_declaration(
  146. first_token, iter(declaration_tokens), nested=True)
  147. if declaration.type == 'declaration':
  148. return declaration
  149. else:
  150. tokens = chain(declaration_tokens, semicolon_token, tokens)
  151. return _consume_qualified_rule(first_token, tokens, stop_token=';', nested=True)
  152. def _consume_declaration_in_list(first_token, tokens):
  153. """Like :func:`_parse_declaration`, but stop at the first ``;``.
  154. Deprecated, use :func:`_consume_blocks_content` instead.
  155. """
  156. other_declaration_tokens = []
  157. for token in tokens:
  158. if token == ';':
  159. break
  160. other_declaration_tokens.append(token)
  161. return _parse_declaration(first_token, iter(other_declaration_tokens))
  162. def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False):
  163. """Parse a block’s contents.
  164. This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content`
  165. of a style rule or ``@page`` rule, or for the ``style`` attribute of an
  166. HTML element.
  167. In contexts that don’t expect any at-rule and/or qualified rule,
  168. all :class:`~tinycss2.ast.AtRule` and/or
  169. :class:`~tinycss2.ast.QualifiedRule` objects should simply be rejected as
  170. invalid.
  171. :type input: :obj:`str` or :term:`iterable`
  172. :param input: A string or an iterable of :term:`component values`.
  173. :type skip_comments: :obj:`bool`
  174. :param skip_comments:
  175. Ignore CSS comments at the top-level of the list.
  176. If the input is a string, ignore all comments.
  177. :type skip_whitespace: :obj:`bool`
  178. :param skip_whitespace:
  179. Ignore whitespace at the top-level of the list.
  180. Whitespace is still preserved
  181. in the :attr:`~tinycss2.ast.Declaration.value` of declarations
  182. and the :attr:`~tinycss2.ast.AtRule.prelude`
  183. and :attr:`~tinycss2.ast.AtRule.content` of at-rules.
  184. :returns:
  185. A list of
  186. :class:`~tinycss2.ast.Declaration`,
  187. :class:`~tinycss2.ast.AtRule`,
  188. :class:`~tinycss2.ast.QualifiedRule`,
  189. :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
  190. :class:`~tinycss2.ast.WhitespaceToken`
  191. (if ``skip_whitespace`` is false),
  192. and :class:`~tinycss2.ast.ParseError` objects
  193. """
  194. tokens = _to_token_iterator(input, skip_comments)
  195. result = []
  196. for token in tokens:
  197. if token.type == 'whitespace':
  198. if not skip_whitespace:
  199. result.append(token)
  200. elif token.type == 'comment':
  201. if not skip_comments:
  202. result.append(token)
  203. elif token.type == 'at-keyword':
  204. result.append(_consume_at_rule(token, tokens))
  205. elif token != ';':
  206. result.append(_consume_blocks_content(token, tokens))
  207. return result
  208. def parse_declaration_list(input, skip_comments=False, skip_whitespace=False):
  209. """Parse a :diagram:`declaration list` (which may also contain at-rules).
  210. Deprecated and removed from CSS Syntax Level 3. Use
  211. :func:`parse_blocks_contents` instead.
  212. This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content`
  213. of a style rule or ``@page`` rule, or for the ``style`` attribute of an
  214. HTML element.
  215. In contexts that don’t expect any at-rule, all
  216. :class:`~tinycss2.ast.AtRule` objects should simply be rejected as invalid.
  217. :type input: :obj:`str` or :term:`iterable`
  218. :param input: A string or an iterable of :term:`component values`.
  219. :type skip_comments: :obj:`bool`
  220. :param skip_comments:
  221. Ignore CSS comments at the top-level of the list.
  222. If the input is a string, ignore all comments.
  223. :type skip_whitespace: :obj:`bool`
  224. :param skip_whitespace:
  225. Ignore whitespace at the top-level of the list.
  226. Whitespace is still preserved
  227. in the :attr:`~tinycss2.ast.Declaration.value` of declarations
  228. and the :attr:`~tinycss2.ast.AtRule.prelude`
  229. and :attr:`~tinycss2.ast.AtRule.content` of at-rules.
  230. :returns:
  231. A list of
  232. :class:`~tinycss2.ast.Declaration`,
  233. :class:`~tinycss2.ast.AtRule`,
  234. :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
  235. :class:`~tinycss2.ast.WhitespaceToken`
  236. (if ``skip_whitespace`` is false),
  237. and :class:`~tinycss2.ast.ParseError` objects
  238. """
  239. tokens = _to_token_iterator(input, skip_comments)
  240. result = []
  241. for token in tokens:
  242. if token.type == 'whitespace':
  243. if not skip_whitespace:
  244. result.append(token)
  245. elif token.type == 'comment':
  246. if not skip_comments:
  247. result.append(token)
  248. elif token.type == 'at-keyword':
  249. result.append(_consume_at_rule(token, tokens))
  250. elif token != ';':
  251. result.append(_consume_declaration_in_list(token, tokens))
  252. return result
  253. def parse_one_rule(input, skip_comments=False):
  254. """Parse a single :diagram:`qualified rule` or :diagram:`at-rule`.
  255. This would be used e.g. by `insertRule()
  256. <https://drafts.csswg.org/cssom/#dom-cssstylesheet-insertrule>`_
  257. in an implementation of CSSOM.
  258. :type input: :obj:`str` or :term:`iterable`
  259. :param input: A string or an iterable of :term:`component values`.
  260. :type skip_comments: :obj:`bool`
  261. :param skip_comments:
  262. If the input is a string, ignore all CSS comments.
  263. :returns:
  264. A :class:`~tinycss2.ast.QualifiedRule`,
  265. :class:`~tinycss2.ast.AtRule`,
  266. or :class:`~tinycss2.ast.ParseError` objects.
  267. Any whitespace or comment before or after the rule is dropped.
  268. """
  269. tokens = _to_token_iterator(input, skip_comments)
  270. first = _next_significant(tokens)
  271. if first is None:
  272. return ParseError(1, 1, 'empty', 'Input is empty')
  273. rule = _consume_rule(first, tokens)
  274. next = _next_significant(tokens)
  275. if next is not None:
  276. return ParseError(
  277. next.source_line, next.source_column, 'extra-input',
  278. 'Expected a single rule, got %s after the first rule.' % next.type)
  279. return rule
  280. def parse_rule_list(input, skip_comments=False, skip_whitespace=False):
  281. """Parse a non-top-level :diagram:`rule list`.
  282. Deprecated and removed from CSS Syntax. Use :func:`parse_blocks_contents`
  283. instead.
  284. This is used for parsing the :attr:`~tinycss2.ast.AtRule.content`
  285. of nested rules like ``@media``.
  286. This differs from :func:`parse_stylesheet` in that
  287. top-level ``<!--`` and ``-->`` tokens are not ignored.
  288. :type input: :obj:`str` or :term:`iterable`
  289. :param input: A string or an iterable of :term:`component values`.
  290. :type skip_comments: :obj:`bool`
  291. :param skip_comments:
  292. Ignore CSS comments at the top-level of the list.
  293. If the input is a string, ignore all comments.
  294. :type skip_whitespace: :obj:`bool`
  295. :param skip_whitespace:
  296. Ignore whitespace at the top-level of the list.
  297. Whitespace is still preserved
  298. in the :attr:`~tinycss2.ast.QualifiedRule.prelude`
  299. and the :attr:`~tinycss2.ast.QualifiedRule.content` of rules.
  300. :returns:
  301. A list of
  302. :class:`~tinycss2.ast.QualifiedRule`,
  303. :class:`~tinycss2.ast.AtRule`,
  304. :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
  305. :class:`~tinycss2.ast.WhitespaceToken`
  306. (if ``skip_whitespace`` is false),
  307. and :class:`~tinycss2.ast.ParseError` objects.
  308. """
  309. tokens = _to_token_iterator(input, skip_comments)
  310. result = []
  311. for token in tokens:
  312. if token.type == 'whitespace':
  313. if not skip_whitespace:
  314. result.append(token)
  315. elif token.type == 'comment':
  316. if not skip_comments:
  317. result.append(token)
  318. else:
  319. result.append(_consume_rule(token, tokens))
  320. return result
  321. def parse_stylesheet(input, skip_comments=False, skip_whitespace=False):
  322. """Parse :diagram:`stylesheet` from text.
  323. This is used e.g. for a ``<style>`` HTML element.
  324. This differs from :func:`parse_rule_list` in that
  325. top-level ``<!--`` and ``-->`` tokens are ignored.
  326. This is a legacy quirk for the ``<style>`` HTML element.
  327. :type input: :obj:`str` or :term:`iterable`
  328. :param input: A string or an iterable of :term:`component values`.
  329. :type skip_comments: :obj:`bool`
  330. :param skip_comments:
  331. Ignore CSS comments at the top-level of the stylesheet.
  332. If the input is a string, ignore all comments.
  333. :type skip_whitespace: :obj:`bool`
  334. :param skip_whitespace:
  335. Ignore whitespace at the top-level of the stylesheet.
  336. Whitespace is still preserved
  337. in the :attr:`~tinycss2.ast.QualifiedRule.prelude`
  338. and the :attr:`~tinycss2.ast.QualifiedRule.content` of rules.
  339. :returns:
  340. A list of
  341. :class:`~tinycss2.ast.QualifiedRule`,
  342. :class:`~tinycss2.ast.AtRule`,
  343. :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
  344. :class:`~tinycss2.ast.WhitespaceToken`
  345. (if ``skip_whitespace`` is false),
  346. and :class:`~tinycss2.ast.ParseError` objects.
  347. """
  348. tokens = _to_token_iterator(input, skip_comments)
  349. result = []
  350. for token in tokens:
  351. if token.type == 'whitespace':
  352. if not skip_whitespace:
  353. result.append(token)
  354. elif token.type == 'comment':
  355. if not skip_comments:
  356. result.append(token)
  357. elif token not in ('<!--', '-->'):
  358. result.append(_consume_rule(token, tokens))
  359. return result
  360. def _consume_rule(first_token, tokens):
  361. """Parse a qualified rule or at-rule.
  362. Consume just enough of :obj:`tokens` for this rule.
  363. :type first_token: :term:`component value`
  364. :param first_token: The first component value of the rule.
  365. :type tokens: :term:`iterator`
  366. :param tokens: An iterator yielding :term:`component values`.
  367. :returns:
  368. A :class:`~tinycss2.ast.QualifiedRule`,
  369. :class:`~tinycss2.ast.AtRule`,
  370. or :class:`~tinycss2.ast.ParseError`.
  371. """
  372. if first_token.type == 'at-keyword':
  373. return _consume_at_rule(first_token, tokens)
  374. return _consume_qualified_rule(first_token, tokens)
  375. def _consume_at_rule(at_keyword, tokens):
  376. """Parse an at-rule.
  377. Consume just enough of :obj:`tokens` for this rule.
  378. :type at_keyword: :class:`AtKeywordToken`
  379. :param at_keyword: The at-rule keyword token starting this rule.
  380. :type tokens: :term:`iterator`
  381. :param tokens: An iterator yielding :term:`component values`.
  382. :type nested: :obj:`bool`
  383. :param nested: Whether the at-rule is nested or top-level.
  384. :returns:
  385. A :class:`~tinycss2.ast.QualifiedRule`,
  386. or :class:`~tinycss2.ast.ParseError`.
  387. """
  388. prelude = []
  389. content = None
  390. for token in tokens:
  391. if token.type == '{} block':
  392. # TODO: handle nested at-rules
  393. # https://drafts.csswg.org/css-syntax-3/#consume-at-rule
  394. content = token.content
  395. break
  396. elif token == ';':
  397. break
  398. prelude.append(token)
  399. return AtRule(
  400. at_keyword.source_line, at_keyword.source_column, at_keyword.value,
  401. at_keyword.lower_value, prelude, content)
  402. def _rule_error(token, name):
  403. """Create rule parse error raised because of given token."""
  404. return ParseError(
  405. token.source_line, token.source_column, 'invalid',
  406. f'{name} reached before {{}} block for a qualified rule.')
  407. def _consume_qualified_rule(first_token, tokens, nested=False,
  408. stop_token=None):
  409. """Consume a qualified rule.
  410. Consume just enough of :obj:`tokens` for this rule.
  411. :type first_token: :term:`component value`
  412. :param first_token: The first component value of the rule.
  413. :type tokens: :term:`iterator`
  414. :param tokens: An iterator yielding :term:`component values`.
  415. :type nested: :obj:`bool`
  416. :param nested: Whether the rule is nested or top-level.
  417. :type stop_token: :class:`~tinycss2.ast.Node`
  418. :param stop_token: A token that ends rule parsing when met.
  419. """
  420. if first_token == stop_token:
  421. return _rule_error(first_token, 'Stop token')
  422. if first_token.type == '{} block':
  423. prelude = []
  424. block = first_token
  425. else:
  426. prelude = [first_token]
  427. for token in tokens:
  428. if token == stop_token:
  429. return _rule_error(token, 'Stop token')
  430. if token.type == '{} block':
  431. block = token
  432. # TODO: handle special case for CSS variables (using "nested")
  433. # https://drafts.csswg.org/css-syntax-3/#consume-qualified-rule
  434. break
  435. prelude.append(token)
  436. else:
  437. return _rule_error(prelude[-1], 'EOF')
  438. return QualifiedRule(
  439. first_token.source_line, first_token.source_column, prelude, block.content)