actions.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. # actions.py
  2. from __future__ import annotations
  3. from typing import Union, Callable, Any
  4. from .exceptions import ParseException
  5. from .util import col, replaced_by_pep8
  6. from .results import ParseResults
  7. ParseAction = Union[
  8. Callable[[], Any],
  9. Callable[[ParseResults], Any],
  10. Callable[[int, ParseResults], Any],
  11. Callable[[str, int, ParseResults], Any],
  12. ]
  13. class OnlyOnce:
  14. """
  15. Wrapper for parse actions, to ensure they are only called once.
  16. Note: parse action signature must include all 3 arguments.
  17. """
  18. def __init__(self, method_call: Callable[[str, int, ParseResults], Any]) -> None:
  19. from .core import _trim_arity
  20. self.callable = _trim_arity(method_call)
  21. self.called = False
  22. def __call__(self, s: str, l: int, t: ParseResults) -> ParseResults:
  23. if not self.called:
  24. results = self.callable(s, l, t)
  25. self.called = True
  26. return results
  27. raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
  28. def reset(self):
  29. """
  30. Allow the associated parse action to be called once more.
  31. """
  32. self.called = False
  33. def match_only_at_col(n: int) -> ParseAction:
  34. """
  35. Helper method for defining parse actions that require matching at
  36. a specific column in the input text.
  37. """
  38. def verify_col(strg: str, locn: int, toks: ParseResults) -> None:
  39. if col(locn, strg) != n:
  40. raise ParseException(strg, locn, f"matched token not at column {n}")
  41. return verify_col
  42. def replace_with(repl_str: Any) -> ParseAction:
  43. """
  44. Helper method for common parse actions that simply return
  45. a literal value. Especially useful when used with
  46. :meth:`~ParserElement.transform_string`.
  47. Example:
  48. .. doctest::
  49. >>> num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
  50. >>> na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
  51. >>> term = na | num
  52. >>> term[1, ...].parse_string("324 234 N/A 234")
  53. ParseResults([324, 234, nan, 234], {})
  54. """
  55. return lambda s, l, t: [repl_str]
  56. def remove_quotes(s: str, l: int, t: ParseResults) -> Any:
  57. r"""
  58. Helper parse action for removing quotation marks from parsed
  59. quoted strings, that use a single character for quoting. For parsing
  60. strings that may have multiple characters, use the :class:`QuotedString`
  61. class.
  62. Example:
  63. .. doctest::
  64. >>> # by default, quotation marks are included in parsed results
  65. >>> quoted_string.parse_string("'Now is the Winter of our Discontent'")
  66. ParseResults(["'Now is the Winter of our Discontent'"], {})
  67. >>> # use remove_quotes to strip quotation marks from parsed results
  68. >>> dequoted = quoted_string().set_parse_action(remove_quotes)
  69. >>> dequoted.parse_string("'Now is the Winter of our Discontent'")
  70. ParseResults(['Now is the Winter of our Discontent'], {})
  71. """
  72. return t[0][1:-1]
  73. def with_attribute(*args: tuple[str, str], **attr_dict) -> ParseAction:
  74. """
  75. Helper to create a validating parse action to be used with start
  76. tags created with :class:`make_xml_tags` or
  77. :class:`make_html_tags`. Use ``with_attribute`` to qualify
  78. a starting tag with a required attribute value, to avoid false
  79. matches on common tags such as ``<TD>`` or ``<DIV>``.
  80. Call ``with_attribute`` with a series of attribute names and
  81. values. Specify the list of filter attributes names and values as:
  82. - keyword arguments, as in ``(align="right")``, or
  83. - as an explicit dict with ``**`` operator, when an attribute
  84. name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
  85. - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
  86. For attribute names with a namespace prefix, you must use the second
  87. form. Attribute names are matched insensitive to upper/lower case.
  88. If just testing for ``class`` (with or without a namespace), use
  89. :class:`with_class`.
  90. To verify that the attribute exists, but without specifying a value,
  91. pass ``with_attribute.ANY_VALUE`` as the value.
  92. The next two examples use the following input data and tag parsers:
  93. .. testcode::
  94. html = '''
  95. <div>
  96. Some text
  97. <div type="grid">1 4 0 1 0</div>
  98. <div type="graph">1,3 2,3 1,1</div>
  99. <div>this has no type</div>
  100. </div>
  101. '''
  102. div,div_end = make_html_tags("div")
  103. Only match div tag having a type attribute with value "grid":
  104. .. testcode::
  105. div_grid = div().set_parse_action(with_attribute(type="grid"))
  106. grid_expr = div_grid + SkipTo(div | div_end)("body")
  107. for grid_header in grid_expr.search_string(html):
  108. print(grid_header.body)
  109. prints:
  110. .. testoutput::
  111. 1 4 0 1 0
  112. Construct a match with any div tag having a type attribute,
  113. regardless of the value:
  114. .. testcode::
  115. div_any_type = div().set_parse_action(
  116. with_attribute(type=with_attribute.ANY_VALUE)
  117. )
  118. div_expr = div_any_type + SkipTo(div | div_end)("body")
  119. for div_header in div_expr.search_string(html):
  120. print(div_header.body)
  121. prints:
  122. .. testoutput::
  123. 1 4 0 1 0
  124. 1,3 2,3 1,1
  125. """
  126. attrs_list: list[tuple[str, str]] = []
  127. if args:
  128. attrs_list.extend(args)
  129. else:
  130. attrs_list.extend(attr_dict.items())
  131. def pa(s: str, l: int, tokens: ParseResults) -> None:
  132. for attrName, attrValue in attrs_list:
  133. if attrName not in tokens:
  134. raise ParseException(s, l, f"no matching attribute {attrName!r}")
  135. if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: # type: ignore [attr-defined]
  136. raise ParseException(
  137. s,
  138. l,
  139. f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}",
  140. )
  141. return pa
  142. with_attribute.ANY_VALUE = object() # type: ignore [attr-defined]
  143. "Value to use with :class:`with_attribute` parse action, to match any value, as long as the attribute is present"
  144. def with_class(classname: str, namespace: str = "") -> ParseAction:
  145. """
  146. Simplified version of :meth:`with_attribute` when
  147. matching on a div class - made difficult because ``class`` is
  148. a reserved word in Python.
  149. Using similar input data to the :meth:`with_attribute` examples:
  150. .. testcode::
  151. html = '''
  152. <div>
  153. Some text
  154. <div class="grid">1 4 0 1 0</div>
  155. <div class="graph">1,3 2,3 1,1</div>
  156. <div>this &lt;div&gt; has no class</div>
  157. </div>
  158. '''
  159. div,div_end = make_html_tags("div")
  160. Only match div tag having the "grid" class:
  161. .. testcode::
  162. div_grid = div().set_parse_action(with_class("grid"))
  163. grid_expr = div_grid + SkipTo(div | div_end)("body")
  164. for grid_header in grid_expr.search_string(html):
  165. print(grid_header.body)
  166. prints:
  167. .. testoutput::
  168. 1 4 0 1 0
  169. Construct a match with any div tag having a class attribute,
  170. regardless of the value:
  171. .. testcode::
  172. div_any_type = div().set_parse_action(
  173. with_class(withAttribute.ANY_VALUE)
  174. )
  175. div_expr = div_any_type + SkipTo(div | div_end)("body")
  176. for div_header in div_expr.search_string(html):
  177. print(div_header.body)
  178. prints:
  179. .. testoutput::
  180. 1 4 0 1 0
  181. 1,3 2,3 1,1
  182. """
  183. classattr = f"{namespace}:class" if namespace else "class"
  184. return with_attribute(**{classattr: classname})
  185. # Compatibility synonyms
  186. # fmt: off
  187. replaceWith = replaced_by_pep8("replaceWith", replace_with)
  188. removeQuotes = replaced_by_pep8("removeQuotes", remove_quotes)
  189. withAttribute = replaced_by_pep8("withAttribute", with_attribute)
  190. withClass = replaced_by_pep8("withClass", with_class)
  191. matchOnlyAtCol = replaced_by_pep8("matchOnlyAtCol", match_only_at_col)
  192. # fmt: on