| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264 |
- # actions.py
- from __future__ import annotations
- from typing import Union, Callable, Any
- from .exceptions import ParseException
- from .util import col, replaced_by_pep8
- from .results import ParseResults
- ParseAction = Union[
- Callable[[], Any],
- Callable[[ParseResults], Any],
- Callable[[int, ParseResults], Any],
- Callable[[str, int, ParseResults], Any],
- ]
- class OnlyOnce:
- """
- Wrapper for parse actions, to ensure they are only called once.
- Note: parse action signature must include all 3 arguments.
- """
- def __init__(self, method_call: Callable[[str, int, ParseResults], Any]) -> None:
- from .core import _trim_arity
- self.callable = _trim_arity(method_call)
- self.called = False
- def __call__(self, s: str, l: int, t: ParseResults) -> ParseResults:
- if not self.called:
- results = self.callable(s, l, t)
- self.called = True
- return results
- raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
- def reset(self):
- """
- Allow the associated parse action to be called once more.
- """
- self.called = False
- def match_only_at_col(n: int) -> ParseAction:
- """
- Helper method for defining parse actions that require matching at
- a specific column in the input text.
- """
- def verify_col(strg: str, locn: int, toks: ParseResults) -> None:
- if col(locn, strg) != n:
- raise ParseException(strg, locn, f"matched token not at column {n}")
- return verify_col
- def replace_with(repl_str: Any) -> ParseAction:
- """
- Helper method for common parse actions that simply return
- a literal value. Especially useful when used with
- :meth:`~ParserElement.transform_string`.
- Example:
- .. doctest::
- >>> num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
- >>> na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
- >>> term = na | num
- >>> term[1, ...].parse_string("324 234 N/A 234")
- ParseResults([324, 234, nan, 234], {})
- """
- return lambda s, l, t: [repl_str]
- def remove_quotes(s: str, l: int, t: ParseResults) -> Any:
- r"""
- Helper parse action for removing quotation marks from parsed
- quoted strings, that use a single character for quoting. For parsing
- strings that may have multiple characters, use the :class:`QuotedString`
- class.
- Example:
- .. doctest::
- >>> # by default, quotation marks are included in parsed results
- >>> quoted_string.parse_string("'Now is the Winter of our Discontent'")
- ParseResults(["'Now is the Winter of our Discontent'"], {})
- >>> # use remove_quotes to strip quotation marks from parsed results
- >>> dequoted = quoted_string().set_parse_action(remove_quotes)
- >>> dequoted.parse_string("'Now is the Winter of our Discontent'")
- ParseResults(['Now is the Winter of our Discontent'], {})
- """
- return t[0][1:-1]
- def with_attribute(*args: tuple[str, str], **attr_dict) -> ParseAction:
- """
- Helper to create a validating parse action to be used with start
- tags created with :class:`make_xml_tags` or
- :class:`make_html_tags`. Use ``with_attribute`` to qualify
- a starting tag with a required attribute value, to avoid false
- matches on common tags such as ``<TD>`` or ``<DIV>``.
- Call ``with_attribute`` with a series of attribute names and
- values. Specify the list of filter attributes names and values as:
- - keyword arguments, as in ``(align="right")``, or
- - as an explicit dict with ``**`` operator, when an attribute
- name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
- - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
- For attribute names with a namespace prefix, you must use the second
- form. Attribute names are matched insensitive to upper/lower case.
- If just testing for ``class`` (with or without a namespace), use
- :class:`with_class`.
- To verify that the attribute exists, but without specifying a value,
- pass ``with_attribute.ANY_VALUE`` as the value.
- The next two examples use the following input data and tag parsers:
- .. testcode::
- html = '''
- <div>
- Some text
- <div type="grid">1 4 0 1 0</div>
- <div type="graph">1,3 2,3 1,1</div>
- <div>this has no type</div>
- </div>
- '''
- div,div_end = make_html_tags("div")
- Only match div tag having a type attribute with value "grid":
- .. testcode::
- div_grid = div().set_parse_action(with_attribute(type="grid"))
- grid_expr = div_grid + SkipTo(div | div_end)("body")
- for grid_header in grid_expr.search_string(html):
- print(grid_header.body)
- prints:
- .. testoutput::
- 1 4 0 1 0
- Construct a match with any div tag having a type attribute,
- regardless of the value:
- .. testcode::
- div_any_type = div().set_parse_action(
- with_attribute(type=with_attribute.ANY_VALUE)
- )
- div_expr = div_any_type + SkipTo(div | div_end)("body")
- for div_header in div_expr.search_string(html):
- print(div_header.body)
- prints:
- .. testoutput::
- 1 4 0 1 0
- 1,3 2,3 1,1
- """
- attrs_list: list[tuple[str, str]] = []
- if args:
- attrs_list.extend(args)
- else:
- attrs_list.extend(attr_dict.items())
- def pa(s: str, l: int, tokens: ParseResults) -> None:
- for attrName, attrValue in attrs_list:
- if attrName not in tokens:
- raise ParseException(s, l, f"no matching attribute {attrName!r}")
- if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: # type: ignore [attr-defined]
- raise ParseException(
- s,
- l,
- f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}",
- )
- return pa
- with_attribute.ANY_VALUE = object() # type: ignore [attr-defined]
- "Value to use with :class:`with_attribute` parse action, to match any value, as long as the attribute is present"
- def with_class(classname: str, namespace: str = "") -> ParseAction:
- """
- Simplified version of :meth:`with_attribute` when
- matching on a div class - made difficult because ``class`` is
- a reserved word in Python.
- Using similar input data to the :meth:`with_attribute` examples:
- .. testcode::
- html = '''
- <div>
- Some text
- <div class="grid">1 4 0 1 0</div>
- <div class="graph">1,3 2,3 1,1</div>
- <div>this <div> has no class</div>
- </div>
- '''
- div,div_end = make_html_tags("div")
- Only match div tag having the "grid" class:
- .. testcode::
- div_grid = div().set_parse_action(with_class("grid"))
- grid_expr = div_grid + SkipTo(div | div_end)("body")
- for grid_header in grid_expr.search_string(html):
- print(grid_header.body)
- prints:
- .. testoutput::
- 1 4 0 1 0
- Construct a match with any div tag having a class attribute,
- regardless of the value:
- .. testcode::
- div_any_type = div().set_parse_action(
- with_class(withAttribute.ANY_VALUE)
- )
- div_expr = div_any_type + SkipTo(div | div_end)("body")
- for div_header in div_expr.search_string(html):
- print(div_header.body)
- prints:
- .. testoutput::
- 1 4 0 1 0
- 1,3 2,3 1,1
- """
- classattr = f"{namespace}:class" if namespace else "class"
- return with_attribute(**{classattr: classname})
- # Compatibility synonyms
- # fmt: off
- replaceWith = replaced_by_pep8("replaceWith", replace_with)
- removeQuotes = replaced_by_pep8("removeQuotes", remove_quotes)
- withAttribute = replaced_by_pep8("withAttribute", with_attribute)
- withClass = replaced_by_pep8("withClass", with_class)
- matchOnlyAtCol = replaced_by_pep8("matchOnlyAtCol", match_only_at_col)
- # fmt: on
|