fake.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. """Fake User Agent retriever."""
  2. import random
  3. from collections.abc import Iterable
  4. from typing import Any, Optional, Union
  5. from fake_useragent.log import logger
  6. from fake_useragent.utils import BrowserUserAgentData, load
  7. def _ensure_iterable(
  8. *, default: Iterable[str], **kwarg: Optional[Iterable[str]]
  9. ) -> list[str]:
  10. """Ensure the given value is an Iterable and convert it to a list.
  11. Args:
  12. default (Iterable[str]): Default iterable to use if value is `None`.
  13. **kwarg (Optional[Iterable[str]]): A single keyword argument containing the value to check
  14. and convert.
  15. Raises:
  16. ValueError: If more than one keyword argument is provided.
  17. TypeError: If the value is not None, not a str, and not iterable.
  18. Returns:
  19. list[str]: A list containing the items from the iterable.
  20. """
  21. if len(kwarg) != 1:
  22. raise ValueError(
  23. f"ensure_iterable expects exactly one keyword argument but got {len(kwarg)}."
  24. )
  25. param_name, value = next(iter(kwarg.items()))
  26. if value is None:
  27. return list(default)
  28. if isinstance(value, str):
  29. return [value]
  30. try:
  31. return list(value)
  32. except TypeError as te:
  33. raise TypeError(
  34. f"'{param_name}' must be an iterable of str, a single str, or None but got "
  35. f"{type(value).__name__}."
  36. ) from te
  37. def _ensure_float(value: Any) -> float:
  38. """Ensure the given value is a float.
  39. Args:
  40. value (Any): The value to check and convert.
  41. Raises:
  42. ValueError: If the value is not a float.
  43. Returns:
  44. float: The float value.
  45. """
  46. try:
  47. return float(value)
  48. except ValueError as ve:
  49. msg = f"Value must be convertible to float but got {value}."
  50. raise ValueError(msg) from ve
  51. def _is_magic_name(attribute_name: str) -> bool:
  52. """Judge whether the given attribute name is the name of a magic method(e.g. __iter__).
  53. Args:
  54. attribute_name (str): The attribute name to check.
  55. Returns:
  56. bool: Whether the given attribute name is magic.
  57. """
  58. magic_min_length = 2 * len("__") + 1
  59. return (
  60. len(attribute_name) >= magic_min_length
  61. and attribute_name.isascii()
  62. and attribute_name.startswith("__")
  63. and attribute_name.endswith("__")
  64. )
  65. class FakeUserAgent:
  66. """Fake User Agent retriever.
  67. Args:
  68. browsers (Optional[Iterable[str]], optional): If given, will only ever return user agents
  69. from these browsers. If None, set to:
  70. `["Google", "Chrome", "Firefox", "Edge", "Opera"," Safari", "Android", "Yandex Browser", "Samsung Internet", "Opera Mobile",
  71. "Mobile Safari", "Firefox Mobile", "Firefox iOS", "Chrome Mobile", "Chrome Mobile iOS", "Mobile Safari UI/WKWebView",
  72. "Edge Mobile", "DuckDuckGo Mobile", "MiuiBrowser", "Whale", "Twitter", "Facebook", "Amazon Silk"]`.
  73. Defaults to None.
  74. os (Optional[Iterable[str]], optional): If given, will only ever return user agents from
  75. these operating systems. If None, set to `["Windows", "Linux", "Ubuntu", "Chrome OS", "Mac OS X", "Android","iOS"]`. Defaults to
  76. None.
  77. min_version (float, optional): Will only ever return user agents with versions greater than
  78. this one. Defaults to 0.0.
  79. min_percentage (float, optional): Filter user agents based on usage.
  80. Defaults to 0.0.
  81. platforms (Optional[Iterable[str]], optional): If given, will only return the user-agents with
  82. the provided platform type. If None, set to `["desktop", "mobile", "tablet"]`. Defaults to None.
  83. fallback (str, optional): User agent to use if there are any issues retrieving a user agent.
  84. Defaults to `"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like
  85. Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0"`.
  86. safe_attrs (Optional[Iterable[str]], optional): `FakeUserAgent` uses a custom `__getattr__`
  87. to facilitate retrieval of user agents by browser. If you need to prevent some
  88. attributes from being treated as browsers, pass them here. If None, all attributes will
  89. be treated as browsers. Defaults to ["shape"] to prevent unintended calls in IDEs like PyCharm.
  90. Raises:
  91. TypeError: If `fallback` isn't a `str` or `safe_attrs` contains non-`str` values.
  92. """
  93. def __init__(
  94. self,
  95. browsers: Optional[Iterable[str]] = None,
  96. os: Optional[Iterable[str]] = None,
  97. min_version: float = 0.0,
  98. min_percentage: float = 0.0,
  99. platforms: Optional[Iterable[str]] = None,
  100. fallback: str = (
  101. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
  102. "AppleWebKit/537.36 (KHTML, like Gecko) "
  103. "Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0"
  104. ),
  105. safe_attrs: Optional[Iterable[str]] = None,
  106. ):
  107. self.browsers = _ensure_iterable(
  108. browsers=browsers,
  109. default=[
  110. "Google",
  111. "Chrome",
  112. "Firefox",
  113. "Edge",
  114. "Opera",
  115. "Safari",
  116. "Android",
  117. "Yandex Browser",
  118. "Samsung Internet",
  119. "Opera Mobile",
  120. "Mobile Safari",
  121. "Firefox Mobile",
  122. "Firefox iOS",
  123. "Chrome Mobile",
  124. "Chrome Mobile iOS",
  125. "Mobile Safari UI/WKWebView",
  126. "Edge Mobile",
  127. "DuckDuckGo Mobile",
  128. "MiuiBrowser",
  129. "Whale",
  130. "Twitter",
  131. "Facebook",
  132. "Amazon Silk",
  133. ],
  134. )
  135. self.os = _ensure_iterable(
  136. os=os,
  137. default=[
  138. "Windows",
  139. "Linux",
  140. "Ubuntu",
  141. "Chrome OS",
  142. "Mac OS X",
  143. "Android",
  144. "iOS",
  145. ],
  146. )
  147. self.min_percentage = _ensure_float(min_percentage)
  148. self.min_version = _ensure_float(min_version)
  149. self.platforms = _ensure_iterable(
  150. platforms=platforms, default=["desktop", "mobile", "tablet"]
  151. )
  152. if not isinstance(fallback, str):
  153. msg = f"fallback must be a str but got {type(fallback).__name__}."
  154. raise TypeError(msg)
  155. self.fallback = fallback
  156. if safe_attrs is None:
  157. safe_attrs = ["shape"]
  158. safe_attrs = _ensure_iterable(safe_attrs=safe_attrs, default=set())
  159. str_safe_attrs = [isinstance(attr, str) for attr in safe_attrs]
  160. if not all(str_safe_attrs):
  161. bad_indices = [
  162. idx for idx, is_str in enumerate(str_safe_attrs) if not is_str
  163. ]
  164. msg = f"safe_attrs must be an iterable of str but indices {bad_indices} are not."
  165. raise TypeError(msg)
  166. self.safe_attrs = set(safe_attrs)
  167. # Next, load our local data file into memory (browsers.jsonl)
  168. self.data_browsers = load()
  169. def getBrowser(self, browsers: Union[str, list[str]]) -> BrowserUserAgentData:
  170. """Get a browser user agent based on the filters.
  171. Args:
  172. browsers (str): The browser name(s) to get. Special keyword "random" will return a random user-agent string.
  173. Returns:
  174. BrowserUserAgentData: The user agent with additional data.
  175. """
  176. try:
  177. if browsers == "random":
  178. # Filter the browser list based on the browsers array using lambda
  179. # And based on OS list
  180. # And percentage is bigger then min percentage
  181. # And convert the iterator back to a list
  182. filtered_browsers = self._filter_useragents()
  183. else:
  184. # Or when random isn't select, we filter the browsers array based on the 'request' using lamba
  185. # And based on OS list
  186. # And percentage is bigger then min percentage
  187. # And convert the iterator back to a list
  188. filtered_browsers = self._filter_useragents(browsers_to_filter=browsers)
  189. # Pick a random browser user-agent from the filtered browsers
  190. # And return the full dict
  191. return random.choice(filtered_browsers) # noqa: S311
  192. except (KeyError, IndexError):
  193. logger.warning(
  194. f"Error occurred during getting browser(s): {browsers}, "
  195. "but was suppressed with fallback.",
  196. )
  197. # Return fallback object
  198. return {
  199. "useragent": self.fallback,
  200. "percent": 100.0,
  201. "type": "desktop",
  202. "device_brand": None,
  203. "browser": "Edge",
  204. "browser_version": "122.0.0.0",
  205. "browser_version_major_minor": 122.0,
  206. "os": "win32",
  207. "os_version": "10",
  208. "platform": "Win32",
  209. }
  210. def _filter_useragents(
  211. self, browsers_to_filter: Optional[Union[str, list[str]]] = None
  212. ) -> list[BrowserUserAgentData]:
  213. """Filter the user agents based on filters set in the instance, and an optional browser name.
  214. User agents from the data file are filtered based on the attributes passed upon
  215. instantiation.
  216. Args:
  217. browsers_to_filter (Union[str, None], optional): A specific browser name you want results for in
  218. this particular call. If None, don't apply extra filters. Defaults to None.
  219. Returns:
  220. list[BrowserUserAgentData]: A filtered list of user agents.
  221. """
  222. # Filter based on browser, os, typem browser version and percentage (weight).
  223. filtered_useragents = list(
  224. filter(
  225. lambda x: x["browser"] in self.browsers
  226. and x["os"] in self.os
  227. and x["type"]
  228. in self.platforms # We check platform on type here (I know it's confusing)
  229. and x["browser_version_major_minor"] >= self.min_version
  230. and x["percent"] >= self.min_percentage,
  231. self.data_browsers,
  232. )
  233. )
  234. # Filter based on a specific browser name(s), if set.
  235. if browsers_to_filter:
  236. # Ensure browsers_to_filter is always a list.
  237. if isinstance(browsers_to_filter, str):
  238. browsers_to_filter = [browsers_to_filter]
  239. filtered_useragents = list(
  240. filter(
  241. lambda x: x["browser"] in browsers_to_filter, filtered_useragents
  242. )
  243. )
  244. return filtered_useragents
  245. def __getitem__(self, attr: str) -> Union[str, Any]:
  246. """Get a user agent by key lookup, as if it were a dictionary (i.e., `ua['random']`).
  247. Args:
  248. attr (str): Browser name to get.
  249. Returns:
  250. Union[str, Any]: The user agent string if not a `self.safe_attr`, otherwise the
  251. attribute value.
  252. """
  253. return self.__getattr__(attr)
  254. def __getattr__(self, attr: Union[str, list[str]]) -> Union[str, Any]:
  255. """Get a user agent string by attribute lookup.
  256. Args:
  257. attr (str): Browser name to get. Special keyword "random" will return a user agent from
  258. any browser allowed by the instance's `self.browsers` filter.
  259. Returns:
  260. Union[str, Any]: The user agent string if not a `self.safe_attr`, otherwise the
  261. attribute value.
  262. """
  263. if isinstance(attr, str):
  264. if _is_magic_name(attr) or attr in self.safe_attrs:
  265. return super(UserAgent, self).__getattribute__(attr)
  266. elif isinstance(attr, list):
  267. for a in attr:
  268. if a in self.safe_attrs:
  269. return super(UserAgent, self).__getattribute__(a)
  270. return self.getBrowser(attr)["useragent"]
  271. @property
  272. def chrome(self) -> str:
  273. """Get a random Chrome user agent."""
  274. return self.__getattr__(["Chrome", "Chrome Mobile", "Chrome Mobile iOS"])
  275. @property
  276. def googlechrome(self) -> str:
  277. """Get a random Chrome user agent."""
  278. return self.chrome
  279. @property
  280. def ff(self) -> str:
  281. """Get a random Firefox user agent."""
  282. return self.firefox
  283. @property
  284. def firefox(self) -> str:
  285. """Get a random Firefox user agent."""
  286. return self.__getattr__(["Firefox", "Firefox Mobile", "Firefox iOS"])
  287. @property
  288. def safari(self) -> str:
  289. """Get a random Safari user agent."""
  290. return self.__getattr__(["Safari", "Mobile Safari"])
  291. @property
  292. def opera(self) -> str:
  293. """Get a random Opera user agent."""
  294. return self.__getattr__(["Opera", "Opera Mobile"])
  295. @property
  296. def google(self) -> str:
  297. """Get a random Google user agent."""
  298. return self.__getattr__(["Google"])
  299. @property
  300. def edge(self) -> str:
  301. """Get a random Edge user agent."""
  302. return self.__getattr__(["Edge", "Edge Mobile"])
  303. @property
  304. def random(self) -> str:
  305. """Get a random user agent."""
  306. return self.__getattr__("random")
  307. @property
  308. def getChrome(self) -> BrowserUserAgentData:
  309. """Get a random Chrome user agent, with additional data."""
  310. return self.getBrowser(["Chrome", "Chrome Mobile", "Chrome Mobile iOS"])
  311. @property
  312. def getFirefox(self) -> BrowserUserAgentData:
  313. """Get a random Firefox user agent, with additional data."""
  314. return self.getBrowser("Firefox")
  315. @property
  316. def getSafari(self) -> BrowserUserAgentData:
  317. """Get a random Safari user agent, with additional data."""
  318. return self.getBrowser(["Safari", "Mobile Safari"])
  319. @property
  320. def getOpera(self) -> BrowserUserAgentData:
  321. """Get a random Safari user agent, with additional data."""
  322. return self.getBrowser(["Opera", "Opera Mobile"])
  323. @property
  324. def getGoogle(self) -> BrowserUserAgentData:
  325. """Get a random Google user agent, with additional data."""
  326. return self.getBrowser(["Google"])
  327. @property
  328. def getEdge(self) -> BrowserUserAgentData:
  329. """Get a random Edge user agent, with additional data."""
  330. return self.getBrowser(["Edge", "Edge Mobile"])
  331. @property
  332. def getRandom(self) -> BrowserUserAgentData:
  333. """Get a random user agent, with additional data."""
  334. return self.getBrowser("random")
  335. # common alias
  336. UserAgent = FakeUserAgent