css.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. """Integration code for CSS selectors using `Soup Sieve <https://facelessuser.github.io/soupsieve/>`_ (pypi: ``soupsieve``).
  2. Acquire a `CSS` object through the `element.Tag.css` attribute of
  3. the starting point of your CSS selector, or (if you want to run a
  4. selector against the entire document) of the `BeautifulSoup` object
  5. itself.
  6. The main advantage of doing this instead of using ``soupsieve``
  7. functions is that you don't need to keep passing the `element.Tag` to be
  8. selected against, since the `CSS` object is permanently scoped to that
  9. `element.Tag`.
  10. """
  11. from __future__ import annotations
  12. from types import ModuleType
  13. from typing import (
  14. Any,
  15. cast,
  16. Iterable,
  17. Iterator,
  18. MutableSequence,
  19. Optional,
  20. TYPE_CHECKING,
  21. )
  22. import warnings
  23. from bs4._typing import _NamespaceMapping
  24. if TYPE_CHECKING:
  25. from soupsieve import SoupSieve
  26. from bs4 import element
  27. from bs4.element import ResultSet, Tag
  28. soupsieve: Optional[ModuleType]
  29. try:
  30. import soupsieve
  31. except ImportError:
  32. soupsieve = None
  33. warnings.warn(
  34. "The soupsieve package is not installed. CSS selectors cannot be used."
  35. )
  36. class CSS(object):
  37. """A proxy object against the ``soupsieve`` library, to simplify its
  38. CSS selector API.
  39. You don't need to instantiate this class yourself; instead, use
  40. `element.Tag.css`.
  41. :param tag: All CSS selectors run by this object will use this as
  42. their starting point.
  43. :param api: An optional drop-in replacement for the ``soupsieve`` module,
  44. intended for use in unit tests.
  45. """
  46. def __init__(self, tag: element.Tag, api: Optional[ModuleType] = None):
  47. if api is None:
  48. api = soupsieve
  49. if api is None:
  50. raise NotImplementedError(
  51. "Cannot execute CSS selectors because the soupsieve package is not installed."
  52. )
  53. self.api = api
  54. self.tag = tag
  55. def escape(self, ident: str) -> str:
  56. """Escape a CSS identifier.
  57. This is a simple wrapper around `soupsieve.escape() <https://facelessuser.github.io/soupsieve/api/#soupsieveescape>`_. See the
  58. documentation for that function for more information.
  59. """
  60. if soupsieve is None:
  61. raise NotImplementedError(
  62. "Cannot escape CSS identifiers because the soupsieve package is not installed."
  63. )
  64. return cast(str, self.api.escape(ident))
  65. def _ns(
  66. self, ns: Optional[_NamespaceMapping], select: str
  67. ) -> Optional[_NamespaceMapping]:
  68. """Normalize a dictionary of namespaces."""
  69. if not isinstance(select, self.api.SoupSieve) and ns is None:
  70. # If the selector is a precompiled pattern, it already has
  71. # a namespace context compiled in, which cannot be
  72. # replaced.
  73. ns = self.tag._namespaces
  74. return ns
  75. def _rs(self, results: MutableSequence[Tag]) -> ResultSet[Tag]:
  76. """Normalize a list of results to a py:class:`ResultSet`.
  77. A py:class:`ResultSet` is more consistent with the rest of
  78. Beautiful Soup's API, and :py:meth:`ResultSet.__getattr__` has
  79. a helpful error message if you try to treat a list of results
  80. as a single result (a common mistake).
  81. """
  82. # Import here to avoid circular import
  83. from bs4 import ResultSet
  84. return ResultSet(None, results)
  85. def compile(
  86. self,
  87. select: str,
  88. namespaces: Optional[_NamespaceMapping] = None,
  89. flags: int = 0,
  90. **kwargs: Any,
  91. ) -> SoupSieve:
  92. """Pre-compile a selector and return the compiled object.
  93. :param selector: A CSS selector.
  94. :param namespaces: A dictionary mapping namespace prefixes
  95. used in the CSS selector to namespace URIs. By default,
  96. Beautiful Soup will use the prefixes it encountered while
  97. parsing the document.
  98. :param flags: Flags to be passed into Soup Sieve's
  99. `soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method.
  100. :param kwargs: Keyword arguments to be passed into Soup Sieve's
  101. `soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method.
  102. :return: A precompiled selector object.
  103. :rtype: soupsieve.SoupSieve
  104. """
  105. return self.api.compile(select, self._ns(namespaces, select), flags, **kwargs)
  106. def select_one(
  107. self,
  108. select: str,
  109. namespaces: Optional[_NamespaceMapping] = None,
  110. flags: int = 0,
  111. **kwargs: Any,
  112. ) -> element.Tag | None:
  113. """Perform a CSS selection operation on the current Tag and return the
  114. first result, if any.
  115. This uses the Soup Sieve library. For more information, see
  116. that library's documentation for the `soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method.
  117. :param selector: A CSS selector.
  118. :param namespaces: A dictionary mapping namespace prefixes
  119. used in the CSS selector to namespace URIs. By default,
  120. Beautiful Soup will use the prefixes it encountered while
  121. parsing the document.
  122. :param flags: Flags to be passed into Soup Sieve's
  123. `soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method.
  124. :param kwargs: Keyword arguments to be passed into Soup Sieve's
  125. `soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method.
  126. """
  127. return self.api.select_one(
  128. select, self.tag, self._ns(namespaces, select), flags, **kwargs
  129. )
  130. def select(
  131. self,
  132. select: str,
  133. namespaces: Optional[_NamespaceMapping] = None,
  134. limit: int = 0,
  135. flags: int = 0,
  136. **kwargs: Any,
  137. ) -> ResultSet[element.Tag]:
  138. """Perform a CSS selection operation on the current `element.Tag`.
  139. This uses the Soup Sieve library. For more information, see
  140. that library's documentation for the `soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method.
  141. :param selector: A CSS selector.
  142. :param namespaces: A dictionary mapping namespace prefixes
  143. used in the CSS selector to namespace URIs. By default,
  144. Beautiful Soup will pass in the prefixes it encountered while
  145. parsing the document.
  146. :param limit: After finding this number of results, stop looking.
  147. :param flags: Flags to be passed into Soup Sieve's
  148. `soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method.
  149. :param kwargs: Keyword arguments to be passed into Soup Sieve's
  150. `soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method.
  151. """
  152. if limit is None:
  153. limit = 0
  154. return self._rs(
  155. self.api.select(
  156. select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
  157. )
  158. )
  159. def iselect(
  160. self,
  161. select: str,
  162. namespaces: Optional[_NamespaceMapping] = None,
  163. limit: int = 0,
  164. flags: int = 0,
  165. **kwargs: Any,
  166. ) -> Iterator[element.Tag]:
  167. """Perform a CSS selection operation on the current `element.Tag`.
  168. This uses the Soup Sieve library. For more information, see
  169. that library's documentation for the `soupsieve.iselect()
  170. <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_
  171. method. It is the same as select(), but it returns a generator
  172. instead of a list.
  173. :param selector: A string containing a CSS selector.
  174. :param namespaces: A dictionary mapping namespace prefixes
  175. used in the CSS selector to namespace URIs. By default,
  176. Beautiful Soup will pass in the prefixes it encountered while
  177. parsing the document.
  178. :param limit: After finding this number of results, stop looking.
  179. :param flags: Flags to be passed into Soup Sieve's
  180. `soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method.
  181. :param kwargs: Keyword arguments to be passed into Soup Sieve's
  182. `soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method.
  183. """
  184. return self.api.iselect(
  185. select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
  186. )
  187. def closest(
  188. self,
  189. select: str,
  190. namespaces: Optional[_NamespaceMapping] = None,
  191. flags: int = 0,
  192. **kwargs: Any,
  193. ) -> Optional[element.Tag]:
  194. """Find the `element.Tag` closest to this one that matches the given selector.
  195. This uses the Soup Sieve library. For more information, see
  196. that library's documentation for the `soupsieve.closest()
  197. <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_
  198. method.
  199. :param selector: A string containing a CSS selector.
  200. :param namespaces: A dictionary mapping namespace prefixes
  201. used in the CSS selector to namespace URIs. By default,
  202. Beautiful Soup will pass in the prefixes it encountered while
  203. parsing the document.
  204. :param flags: Flags to be passed into Soup Sieve's
  205. `soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method.
  206. :param kwargs: Keyword arguments to be passed into Soup Sieve's
  207. `soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method.
  208. """
  209. return self.api.closest(
  210. select, self.tag, self._ns(namespaces, select), flags, **kwargs
  211. )
  212. def match(
  213. self,
  214. select: str,
  215. namespaces: Optional[_NamespaceMapping] = None,
  216. flags: int = 0,
  217. **kwargs: Any,
  218. ) -> bool:
  219. """Check whether or not this `element.Tag` matches the given CSS selector.
  220. This uses the Soup Sieve library. For more information, see
  221. that library's documentation for the `soupsieve.match()
  222. <https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_
  223. method.
  224. :param: a CSS selector.
  225. :param namespaces: A dictionary mapping namespace prefixes
  226. used in the CSS selector to namespace URIs. By default,
  227. Beautiful Soup will pass in the prefixes it encountered while
  228. parsing the document.
  229. :param flags: Flags to be passed into Soup Sieve's
  230. `soupsieve.match()
  231. <https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_
  232. method.
  233. :param kwargs: Keyword arguments to be passed into SoupSieve's
  234. `soupsieve.match()
  235. <https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_
  236. method.
  237. """
  238. return cast(
  239. bool,
  240. self.api.match(
  241. select, self.tag, self._ns(namespaces, select), flags, **kwargs
  242. ),
  243. )
  244. def filter(
  245. self,
  246. select: str,
  247. namespaces: Optional[_NamespaceMapping] = None,
  248. flags: int = 0,
  249. **kwargs: Any,
  250. ) -> ResultSet[element.Tag]:
  251. """Filter this `element.Tag`'s direct children based on the given CSS selector.
  252. This uses the Soup Sieve library. It works the same way as
  253. passing a `element.Tag` into that library's `soupsieve.filter()
  254. <https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_
  255. method. For more information, see the documentation for
  256. `soupsieve.filter()
  257. <https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_.
  258. :param namespaces: A dictionary mapping namespace prefixes
  259. used in the CSS selector to namespace URIs. By default,
  260. Beautiful Soup will pass in the prefixes it encountered while
  261. parsing the document.
  262. :param flags: Flags to be passed into Soup Sieve's
  263. `soupsieve.filter()
  264. <https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_
  265. method.
  266. :param kwargs: Keyword arguments to be passed into SoupSieve's
  267. `soupsieve.filter()
  268. <https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_
  269. method.
  270. """
  271. return self._rs(
  272. self.api.filter(
  273. select, self.tag, self._ns(namespaces, select), flags, **kwargs
  274. )
  275. )