util.py 63 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041
  1. from __future__ import annotations
  2. import colorsys
  3. import contextlib
  4. import dataclasses
  5. import enum
  6. import importlib
  7. import importlib.util
  8. import itertools
  9. import json
  10. import logging
  11. import math
  12. import numbers
  13. import os
  14. import pathlib
  15. import platform
  16. import queue
  17. import random
  18. import re
  19. import secrets
  20. import shlex
  21. import socket
  22. import string
  23. import sys
  24. import tarfile
  25. import tempfile
  26. import threading
  27. import time
  28. import types
  29. import urllib
  30. from collections.abc import Iterable, Mapping, Sequence
  31. from dataclasses import asdict, is_dataclass
  32. from datetime import date, datetime, timedelta
  33. from gzip import GzipFile
  34. from importlib import import_module
  35. from sys import getsizeof
  36. from types import ModuleType
  37. from typing import IO, TYPE_CHECKING, Callable, TextIO, Union
  38. from typing_extensions import Any, Generator, TypeGuard, TypeVar, deprecated
  39. import wandb
  40. import wandb.env
  41. from wandb.errors import (
  42. AuthenticationError,
  43. CommError,
  44. UsageError,
  45. WandbCoreNotAvailableError,
  46. )
  47. from wandb.errors.term import terminput
  48. from wandb.sdk.lib import filesystem, runid
  49. from wandb.sdk.lib.json_util import dump, dumps
  50. from wandb.sdk.lib.paths import FilePathStr, StrPath
  51. if TYPE_CHECKING:
  52. from requests import Response
  53. from wandb.sdk.artifacts.artifact import Artifact
  54. CheckRetryFnType = Callable[[Exception], Union[bool, timedelta]]
  55. T = TypeVar("T")
  56. logger = logging.getLogger(__name__)
  57. _not_importable = set()
  58. LAUNCH_JOB_ARTIFACT_SLOT_NAME = "_wandb_job"
  59. MAX_LINE_BYTES = (10 << 20) - (100 << 10) # imposed by back end
  60. IS_GIT = os.path.exists(os.path.join(os.path.dirname(__file__), "..", ".git"))
  61. # From https://docs.docker.com/engine/reference/commandline/tag/
  62. # "Name components may contain lowercase letters, digits and separators.
  63. # A separator is defined as a period, one or two underscores, or one or more dashes.
  64. # A name component may not start or end with a separator."
  65. DOCKER_IMAGE_NAME_SEPARATOR = "(?:__|[._]|[-]+)"
  66. RE_DOCKER_IMAGE_NAME_SEPARATOR_START = re.compile("^" + DOCKER_IMAGE_NAME_SEPARATOR)
  67. RE_DOCKER_IMAGE_NAME_SEPARATOR_END = re.compile(DOCKER_IMAGE_NAME_SEPARATOR + "$")
  68. RE_DOCKER_IMAGE_NAME_SEPARATOR_REPEAT = re.compile(DOCKER_IMAGE_NAME_SEPARATOR + "{2,}")
  69. RE_DOCKER_IMAGE_NAME_CHARS = re.compile(r"[^a-z0-9._\-]")
  70. POW_10_BYTES = [
  71. ("B", 10**0),
  72. ("KB", 10**3),
  73. ("MB", 10**6),
  74. ("GB", 10**9),
  75. ("TB", 10**12),
  76. ("PB", 10**15),
  77. ("EB", 10**18),
  78. ]
  79. POW_2_BYTES = [
  80. ("B", 2**0),
  81. ("KiB", 2**10),
  82. ("MiB", 2**20),
  83. ("GiB", 2**30),
  84. ("TiB", 2**40),
  85. ("PiB", 2**50),
  86. ("EiB", 2**60),
  87. ]
  88. def vendor_setup() -> Callable:
  89. """Create a function that restores user paths after vendor imports.
  90. This enables us to use the vendor directory for packages we don't depend on. Call
  91. the returned function after imports are complete. If you don't you may modify the
  92. user's path which is never good.
  93. Usage:
  94. ```python
  95. reset_path = vendor_setup()
  96. # do any vendor imports...
  97. reset_path()
  98. ```
  99. """
  100. original_path = [directory for directory in sys.path]
  101. def reset_import_path() -> None:
  102. sys.path = original_path
  103. parent_dir = os.path.abspath(os.path.dirname(__file__))
  104. vendor_dir = os.path.join(parent_dir, "vendor")
  105. vendor_packages = (
  106. "gql-0.2.0",
  107. "graphql-core-1.1",
  108. "watchdog_0_9_0",
  109. "promise-2.3.0",
  110. )
  111. package_dirs = [os.path.join(vendor_dir, p) for p in vendor_packages]
  112. for p in [vendor_dir] + package_dirs:
  113. if p not in sys.path:
  114. sys.path.insert(1, p)
  115. return reset_import_path
  116. def vendor_import(name: str) -> Any:
  117. reset_path = vendor_setup()
  118. module = import_module(name)
  119. reset_path()
  120. return module
  121. class LazyModuleState:
  122. def __init__(self, module: types.ModuleType) -> None:
  123. self.module = module
  124. self.load_started = False
  125. self.lock = threading.RLock()
  126. def load(self) -> None:
  127. with self.lock:
  128. if self.load_started:
  129. return
  130. self.load_started = True
  131. assert self.module.__spec__ is not None
  132. assert self.module.__spec__.loader is not None
  133. self.module.__spec__.loader.exec_module(self.module)
  134. self.module.__class__ = types.ModuleType
  135. # Set the submodule as an attribute on the parent module
  136. # This enables access to the submodule via normal attribute access.
  137. parent, _, child = self.module.__name__.rpartition(".")
  138. if parent:
  139. parent_module = sys.modules[parent]
  140. setattr(parent_module, child, self.module)
  141. class LazyModule(types.ModuleType):
  142. def __getattribute__(self, name: str) -> Any:
  143. state = object.__getattribute__(self, "__lazy_module_state__")
  144. state.load()
  145. return object.__getattribute__(self, name)
  146. def __setattr__(self, name: str, value: Any) -> None:
  147. state = object.__getattribute__(self, "__lazy_module_state__")
  148. state.load()
  149. object.__setattr__(self, name, value)
  150. def __delattr__(self, name: str) -> None:
  151. state = object.__getattribute__(self, "__lazy_module_state__")
  152. state.load()
  153. object.__delattr__(self, name)
  154. def import_module_lazy(name: str) -> types.ModuleType:
  155. """Import a module lazily, only when it is used.
  156. Inspired by importlib.util.LazyLoader, but improved so that the module loading is
  157. thread-safe. Circular dependency between modules can lead to a deadlock if the two
  158. modules are loaded from different threads.
  159. :param (str) name: Dot-separated module path. E.g., 'scipy.stats'.
  160. """
  161. try:
  162. return sys.modules[name]
  163. except KeyError:
  164. spec = importlib.util.find_spec(name)
  165. if spec is None:
  166. raise ModuleNotFoundError
  167. module = importlib.util.module_from_spec(spec)
  168. module.__lazy_module_state__ = LazyModuleState(module) # type: ignore
  169. module.__class__ = LazyModule
  170. sys.modules[name] = module
  171. return module
  172. def get_module(
  173. name: str,
  174. required: str | None = None,
  175. lazy: bool = True,
  176. ) -> Any:
  177. """Return module or None. Absolute import is required.
  178. :param (str) name: Dot-separated module path. E.g., 'scipy.stats'.
  179. :param (str) required: A string to raise a ValueError if missing
  180. :param (bool) lazy: If True, return a lazy loader for the module.
  181. :return: (module|None) If import succeeds, the module will be returned.
  182. """
  183. if name not in _not_importable:
  184. try:
  185. if not lazy:
  186. return import_module(name)
  187. else:
  188. return import_module_lazy(name)
  189. except Exception:
  190. _not_importable.add(name)
  191. msg = f"Error importing optional module {name}"
  192. if required:
  193. logger.exception(msg)
  194. if required and name in _not_importable:
  195. raise wandb.Error(required)
  196. def get_optional_module(name) -> importlib.ModuleInterface | None: # type: ignore
  197. return get_module(name)
  198. np = get_module("numpy")
  199. pd_available = False
  200. pandas_spec = importlib.util.find_spec("pandas")
  201. if pandas_spec is not None:
  202. pd_available = True
  203. # TODO: Revisit these limits
  204. VALUE_BYTES_LIMIT = 100000
  205. @deprecated("Read the `app_url` setting from the appropriate Settings object.")
  206. def app_url(api_url: str) -> str:
  207. """Returns the URL for the W&B UI without a trailing slash."""
  208. if app_url := wandb.env.get_app_url():
  209. return str(app_url.strip("/"))
  210. return api_to_app_url(api_url)
  211. def api_to_app_url(api_url: str) -> str:
  212. """Convert the API URL to an app (UI) URL.
  213. Unlike the deprecated `app_url()`, this is a pure function: it does
  214. not consult environment variables.
  215. """
  216. if "://api.wandb.test" in api_url:
  217. # dev mode
  218. return api_url.replace("://api.", "://app.").strip("/")
  219. elif "://api.wandb." in api_url:
  220. # cloud
  221. return api_url.replace("://api.", "://").strip("/")
  222. elif "://api." in api_url:
  223. # onprem cloud
  224. return api_url.replace("://api.", "://app.").strip("/")
  225. # wandb/local
  226. return api_url
  227. def get_full_typename(o: Any) -> Any:
  228. """Determine types based on type names.
  229. Avoids needing to to import (and therefore depend on) PyTorch, TensorFlow, etc.
  230. """
  231. instance_name = o.__class__.__module__ + "." + o.__class__.__name__
  232. if instance_name in ["builtins.module", "__builtin__.module"]:
  233. return o.__name__
  234. else:
  235. return instance_name
  236. def get_h5_typename(o: Any) -> Any:
  237. typename = get_full_typename(o)
  238. if is_tf_tensor_typename(typename):
  239. return "tensorflow.Tensor"
  240. elif is_pytorch_tensor_typename(typename):
  241. return "torch.Tensor"
  242. else:
  243. return o.__class__.__module__.split(".")[0] + "." + o.__class__.__name__
  244. def is_uri(string: str) -> bool:
  245. parsed_uri = urllib.parse.urlparse(string)
  246. return len(parsed_uri.scheme) > 0
  247. def local_file_uri_to_path(uri: str) -> str:
  248. """Convert URI to local filesystem path.
  249. No-op if the uri does not have the expected scheme.
  250. """
  251. path = urllib.parse.urlparse(uri).path if uri.startswith("file:") else uri
  252. return urllib.request.url2pathname(path)
  253. def get_local_path_or_none(path_or_uri: str) -> str | None:
  254. """Return path if local, None otherwise.
  255. Return None if the argument is a local path (not a scheme or file:///). Otherwise
  256. return `path_or_uri`.
  257. """
  258. parsed_uri = urllib.parse.urlparse(path_or_uri)
  259. if (
  260. len(parsed_uri.scheme) == 0
  261. or parsed_uri.scheme == "file"
  262. and len(parsed_uri.netloc) == 0
  263. ):
  264. return local_file_uri_to_path(path_or_uri)
  265. else:
  266. return None
  267. def check_windows_valid_filename(path: int | str) -> bool:
  268. r"""Verify that the given path does not contain any invalid characters for a Windows filename.
  269. Windows filenames cannot contain the following characters:
  270. < > : " \ / | ? *
  271. For more details, refer to the official documentation:
  272. https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions
  273. Args:
  274. path: The file path to check, which can be either an integer or a string.
  275. Returns:
  276. bool: True if the path does not contain any invalid characters, False otherwise.
  277. """
  278. return not bool(re.search(r'[<>:"\\?*]', path)) # type: ignore
  279. def make_file_path_upload_safe(path: str) -> str:
  280. r"""Makes the provide path safe for file upload.
  281. The filename is made safe by:
  282. 1. Removing any leading slashes to prevent writing to absolute paths
  283. 2. Replacing '.' and '..' with underscores to prevent directory traversal attacks
  284. Raises:
  285. ValueError: If running on Windows and the key contains invalid filename characters
  286. (\, :, *, ?, ", <, >, |)
  287. """
  288. sys_platform = platform.system()
  289. if sys_platform == "Windows" and not check_windows_valid_filename(path):
  290. raise ValueError(
  291. f"Path {path} is invalid. Please remove invalid filename characters"
  292. r' (\, :, *, ?, ", <, >, |)'
  293. )
  294. # On Windows, convert forward slashes to backslashes.
  295. # This ensures that the key is a valid filename on Windows.
  296. if sys_platform == "Windows":
  297. path = str(path).replace("/", os.sep)
  298. # Avoid writing to absolute paths by striping any leading slashes.
  299. # The key has already been validated for windows operating systems in util.check_windows_valid_filename
  300. # This ensures the key does not contain invalid characters for windows, such as '\' or ':'.
  301. # So we can check only for '/' in the key.
  302. path = path.lstrip(os.sep)
  303. # Avoid directory traversal by replacing dots with underscores.
  304. paths = path.split(os.sep)
  305. safe_paths = [
  306. p.replace(".", "_") if p in (os.curdir, os.pardir) else p for p in paths
  307. ]
  308. # Recombine the key into a relative path.
  309. return os.sep.join(safe_paths)
  310. def make_tarfile(
  311. output_filename: str,
  312. source_dir: str,
  313. archive_name: str,
  314. custom_filter: Callable | None = None,
  315. ) -> None:
  316. # Helper for filtering out modification timestamps
  317. def _filter_timestamps(tar_info: tarfile.TarInfo) -> tarfile.TarInfo | None:
  318. tar_info.mtime = 0
  319. return tar_info if custom_filter is None else custom_filter(tar_info)
  320. descriptor, unzipped_filename = tempfile.mkstemp()
  321. try:
  322. with tarfile.open(unzipped_filename, "w") as tar:
  323. tar.add(source_dir, arcname=archive_name, filter=_filter_timestamps)
  324. # When gzipping the tar, don't include the tar's filename or modification time in the
  325. # zipped archive (see https://docs.python.org/3/library/gzip.html#gzip.GzipFile)
  326. with (
  327. open(output_filename, "wb") as out_file,
  328. GzipFile(filename="", fileobj=out_file, mode="wb", mtime=0) as gzipped_tar,
  329. open(unzipped_filename, "rb") as tar_file,
  330. ):
  331. gzipped_tar.write(tar_file.read())
  332. finally:
  333. os.close(descriptor)
  334. os.remove(unzipped_filename)
  335. def is_tf_tensor(obj: Any) -> bool:
  336. import tensorflow # type: ignore
  337. return isinstance(obj, tensorflow.Tensor)
  338. def is_tf_tensor_typename(typename: str) -> bool:
  339. return typename.startswith("tensorflow.") and (
  340. "Tensor" in typename or "Variable" in typename
  341. )
  342. def is_tf_eager_tensor_typename(typename: str) -> bool:
  343. return typename.startswith("tensorflow.") and ("EagerTensor" in typename)
  344. def is_pytorch_tensor(obj: Any) -> bool:
  345. import torch # type: ignore
  346. return isinstance(obj, torch.Tensor)
  347. def is_pytorch_tensor_typename(typename: str) -> bool:
  348. return typename.startswith("torch.") and (
  349. "Tensor" in typename or "Variable" in typename
  350. )
  351. def is_jax_tensor_typename(typename: str) -> bool:
  352. return typename.startswith("jaxlib.") and "Array" in typename
  353. def get_jax_tensor(obj: Any) -> Any:
  354. import jax # type: ignore
  355. return jax.device_get(obj)
  356. def is_fastai_tensor_typename(typename: str) -> bool:
  357. return typename.startswith("fastai.") and ("Tensor" in typename)
  358. def is_pandas_data_frame_typename(typename: str) -> bool:
  359. return typename.startswith("pandas.") and "DataFrame" in typename
  360. def is_matplotlib_typename(typename: str) -> bool:
  361. return typename.startswith("matplotlib.")
  362. def is_plotly_typename(typename: str) -> bool:
  363. return typename.startswith("plotly.")
  364. def is_plotly_figure_typename(typename: str) -> bool:
  365. return typename.startswith("plotly.") and typename.endswith(".Figure")
  366. def is_numpy_array(obj: Any) -> bool:
  367. return np and isinstance(obj, np.ndarray)
  368. def is_pandas_data_frame(obj: Any) -> bool:
  369. if pd_available:
  370. import pandas as pd
  371. return isinstance(obj, pd.DataFrame)
  372. else:
  373. return is_pandas_data_frame_typename(get_full_typename(obj))
  374. def ensure_matplotlib_figure(obj: Any) -> Any:
  375. """Extract the current figure from a matplotlib object.
  376. Return the object itself if it's a figure.
  377. Raises ValueError if the object can't be converted.
  378. """
  379. import matplotlib # type: ignore
  380. from matplotlib.figure import Figure # type: ignore
  381. # there are combinations of plotly and matplotlib versions that don't work well together,
  382. # this patches matplotlib to add a removed method that plotly assumes exists
  383. from matplotlib.spines import Spine # type: ignore
  384. def is_frame_like(self: Any) -> bool:
  385. """Return True if directly on axes frame.
  386. This is useful for determining if a spine is the edge of an
  387. old style MPL plot. If so, this function will return True.
  388. """
  389. position = self._position or ("outward", 0.0)
  390. if isinstance(position, str):
  391. if position == "center":
  392. position = ("axes", 0.5)
  393. elif position == "zero":
  394. position = ("data", 0)
  395. if len(position) != 2:
  396. raise ValueError("position should be 2-tuple")
  397. position_type, amount = position # type: ignore
  398. return bool(position_type == "outward" and amount == 0)
  399. Spine.is_frame_like = is_frame_like
  400. if obj == matplotlib.pyplot:
  401. obj = obj.gcf()
  402. elif (not isinstance(obj, Figure)) and hasattr(obj, "figure"):
  403. obj = obj.figure
  404. # Some matplotlib objects have a figure function
  405. if not isinstance(obj, Figure):
  406. raise ValueError(
  407. "Only matplotlib.pyplot or matplotlib.pyplot.Figure objects are accepted."
  408. )
  409. return obj
  410. def matplotlib_to_plotly(obj: Any) -> Any:
  411. obj = ensure_matplotlib_figure(obj)
  412. tools = get_module(
  413. "plotly.tools",
  414. required=(
  415. "plotly is required to log interactive plots, install with: "
  416. "`pip install plotly` or convert the plot to an image with `wandb.Image(plt)`"
  417. ),
  418. )
  419. return tools.mpl_to_plotly(obj)
  420. def matplotlib_contains_images(obj: Any) -> bool:
  421. obj = ensure_matplotlib_figure(obj)
  422. return any(len(ax.images) > 0 for ax in obj.axes)
  423. def _numpy_generic_convert(obj: Any) -> Any:
  424. obj = obj.item()
  425. if isinstance(obj, float) and math.isnan(obj):
  426. obj = None
  427. elif isinstance(obj, np.generic) and (
  428. obj.dtype.kind == "f" or obj.dtype == "bfloat16"
  429. ):
  430. # obj is a numpy float with precision greater than that of native python float
  431. # (i.e., float96 or float128) or it is of custom type such as bfloat16.
  432. # in these cases, obj.item() does not return a native
  433. # python float (in the first case - to avoid loss of precision,
  434. # so we need to explicitly cast this down to a 64bit float)
  435. obj = float(obj)
  436. return obj
  437. def _sanitize_numpy_keys(
  438. d: dict,
  439. visited: dict[int, dict] | None = None,
  440. ) -> tuple[dict, bool]:
  441. """Returns a dictionary where all NumPy keys are converted.
  442. Args:
  443. d: The dictionary to sanitize.
  444. Returns:
  445. A sanitized dictionary, and a boolean indicating whether anything was
  446. changed.
  447. """
  448. out: dict[Any, Any] = dict()
  449. converted = False
  450. # Work with recursive dictionaries: if a dictionary has already been
  451. # converted, reuse its converted value to retain the recursive structure
  452. # of the input.
  453. if visited is None:
  454. visited = {id(d): out}
  455. elif id(d) in visited:
  456. return visited[id(d)], False
  457. visited[id(d)] = out
  458. for key, value in d.items():
  459. if isinstance(value, dict):
  460. value, converted_value = _sanitize_numpy_keys(value, visited)
  461. converted |= converted_value
  462. if isinstance(key, np.generic):
  463. key = _numpy_generic_convert(key)
  464. converted = True
  465. out[key] = value
  466. return out, converted
  467. def json_friendly( # noqa: C901
  468. obj: Any,
  469. ) -> tuple[Any, bool] | tuple[None | str | float, bool]:
  470. """Convert an object into something that's more becoming of JSON."""
  471. converted = True
  472. typename = get_full_typename(obj)
  473. if is_tf_eager_tensor_typename(typename):
  474. obj = obj.numpy()
  475. elif is_tf_tensor_typename(typename):
  476. try:
  477. obj = obj.eval()
  478. except RuntimeError:
  479. obj = obj.numpy()
  480. elif is_pytorch_tensor_typename(typename) or is_fastai_tensor_typename(typename):
  481. try:
  482. if obj.requires_grad:
  483. obj = obj.detach()
  484. except AttributeError:
  485. pass # before 0.4 is only present on variables
  486. try:
  487. obj = obj.data
  488. except RuntimeError:
  489. pass # happens for Tensors before 0.4
  490. if obj.size():
  491. obj = obj.cpu().detach().numpy()
  492. else:
  493. return obj.item(), True
  494. elif is_jax_tensor_typename(typename):
  495. obj = get_jax_tensor(obj)
  496. if is_numpy_array(obj):
  497. if obj.size == 1:
  498. obj = obj.flatten()[0]
  499. elif obj.size <= 32:
  500. obj = obj.tolist()
  501. elif np and isinstance(obj, np.generic):
  502. obj = _numpy_generic_convert(obj)
  503. elif isinstance(obj, bytes):
  504. obj = obj.decode("utf-8")
  505. elif isinstance(obj, (datetime, date)):
  506. obj = obj.isoformat()
  507. elif callable(obj):
  508. obj = (
  509. f"{obj.__module__}.{obj.__qualname__}"
  510. if hasattr(obj, "__qualname__") and hasattr(obj, "__module__")
  511. else str(obj)
  512. )
  513. elif isinstance(obj, float) and math.isnan(obj):
  514. obj = None
  515. elif isinstance(obj, dict) and np:
  516. obj, converted = _sanitize_numpy_keys(obj)
  517. elif isinstance(obj, set):
  518. # set is not json serializable, so we convert it to tuple
  519. obj = tuple(obj)
  520. elif isinstance(obj, enum.Enum):
  521. obj = obj.name
  522. else:
  523. converted = False
  524. if getsizeof(obj) > VALUE_BYTES_LIMIT:
  525. wandb.termwarn(
  526. f"Serializing object of type {type(obj).__name__} that is {getsizeof(obj)} bytes"
  527. )
  528. return obj, converted
  529. def json_friendly_val(val: Any) -> Any:
  530. """Make any value (including dict, slice, sequence, dataclass) JSON friendly."""
  531. converted: dict | list
  532. if isinstance(val, dict):
  533. converted = {}
  534. for key, value in val.items():
  535. converted[key] = json_friendly_val(value)
  536. return converted
  537. if isinstance(val, slice):
  538. converted = dict(
  539. slice_start=val.start, slice_step=val.step, slice_stop=val.stop
  540. )
  541. return converted
  542. val, _ = json_friendly(val)
  543. if isinstance(val, Sequence) and not isinstance(val, str):
  544. converted = []
  545. for value in val:
  546. converted.append(json_friendly_val(value))
  547. return converted
  548. if is_dataclass(val) and not isinstance(val, type):
  549. converted = asdict(val)
  550. return json_friendly_val(converted)
  551. else:
  552. if val.__class__.__module__ not in ("builtins", "__builtin__"):
  553. val = str(val)
  554. return val
  555. def alias_is_version_index(alias: str) -> bool:
  556. return len(alias) >= 2 and alias[0] == "v" and alias[1:].isnumeric()
  557. def convert_plots(obj: Any) -> Any:
  558. if is_matplotlib_typename(get_full_typename(obj)):
  559. tools = get_module(
  560. "plotly.tools",
  561. required=(
  562. "plotly is required to log interactive plots, install with: "
  563. "`pip install plotly` or convert the plot to an image with `wandb.Image(plt)`"
  564. ),
  565. )
  566. obj = tools.mpl_to_plotly(obj)
  567. if is_plotly_typename(get_full_typename(obj)):
  568. return {"_type": "plotly", "plot": obj.to_plotly_json()}
  569. else:
  570. return obj
  571. def maybe_compress_history(obj: Any) -> tuple[Any, bool]:
  572. if np and isinstance(obj, np.ndarray) and obj.size > 32:
  573. return wandb.Histogram(obj, num_bins=32).to_json(), True
  574. else:
  575. return obj, False
  576. def maybe_compress_summary(obj: Any, h5_typename: str) -> tuple[Any, bool]:
  577. if np and isinstance(obj, np.ndarray) and obj.size > 32:
  578. return (
  579. {
  580. "_type": h5_typename, # may not be ndarray
  581. "var": np.var(obj).item(),
  582. "mean": np.mean(obj).item(),
  583. "min": np.amin(obj).item(),
  584. "max": np.amax(obj).item(),
  585. "10%": np.percentile(obj, 10),
  586. "25%": np.percentile(obj, 25),
  587. "75%": np.percentile(obj, 75),
  588. "90%": np.percentile(obj, 90),
  589. "size": obj.size,
  590. },
  591. True,
  592. )
  593. else:
  594. return obj, False
  595. def launch_browser(attempt_launch_browser: bool = True) -> bool:
  596. """Decide if we should launch a browser."""
  597. _display_variables = ["DISPLAY", "WAYLAND_DISPLAY", "MIR_SOCKET"]
  598. _webbrowser_names_blocklist = ["www-browser", "lynx", "links", "elinks", "w3m"]
  599. import webbrowser
  600. launch_browser = attempt_launch_browser
  601. if launch_browser:
  602. if "linux" in sys.platform and not any(
  603. os.getenv(var) for var in _display_variables
  604. ):
  605. launch_browser = False
  606. try:
  607. browser = webbrowser.get()
  608. if hasattr(browser, "name") and browser.name in _webbrowser_names_blocklist:
  609. launch_browser = False
  610. except webbrowser.Error:
  611. launch_browser = False
  612. return launch_browser
  613. def generate_id(length: int = 8) -> str:
  614. # Do not use this; use wandb.sdk.lib.runid.generate_id instead.
  615. # This is kept only for legacy code.
  616. return runid.generate_id(length)
  617. def parse_tfjob_config() -> Any:
  618. """Attempt to parse TFJob config, returning False if it can't find it."""
  619. if os.getenv("TF_CONFIG"):
  620. try:
  621. return json.loads(os.environ["TF_CONFIG"])
  622. except ValueError:
  623. return False
  624. else:
  625. return False
  626. class WandBJSONEncoder(json.JSONEncoder):
  627. """A JSON Encoder that handles some extra types."""
  628. def default(self, obj: Any) -> Any:
  629. if hasattr(obj, "json_encode"):
  630. return obj.json_encode()
  631. # if hasattr(obj, 'to_json'):
  632. # return obj.to_json()
  633. tmp_obj, converted = json_friendly(obj)
  634. if converted:
  635. return tmp_obj
  636. return json.JSONEncoder.default(self, obj)
  637. class WandBJSONEncoderOld(json.JSONEncoder):
  638. """A JSON Encoder that handles some extra types."""
  639. def default(self, obj: Any) -> Any:
  640. tmp_obj, converted = json_friendly(obj)
  641. tmp_obj, compressed = maybe_compress_summary(tmp_obj, get_h5_typename(obj))
  642. if converted:
  643. return tmp_obj
  644. return json.JSONEncoder.default(self, tmp_obj)
  645. class WandBHistoryJSONEncoder(json.JSONEncoder):
  646. """A JSON Encoder that handles some extra types.
  647. This encoder turns numpy like objects with a size > 32 into histograms.
  648. """
  649. def default(self, obj: Any) -> Any:
  650. obj, converted = json_friendly(obj)
  651. obj, compressed = maybe_compress_history(obj)
  652. if converted:
  653. return obj
  654. return json.JSONEncoder.default(self, obj)
  655. class JSONEncoderUncompressed(json.JSONEncoder):
  656. """A JSON Encoder that handles some extra types.
  657. This encoder turns numpy like objects with a size > 32 into histograms.
  658. """
  659. def default(self, obj: Any) -> Any:
  660. if is_numpy_array(obj):
  661. return obj.tolist()
  662. elif np and isinstance(obj, np.number):
  663. return obj.item()
  664. elif np and isinstance(obj, np.generic):
  665. obj = obj.item()
  666. return json.JSONEncoder.default(self, obj)
  667. def json_dump_safer(obj: Any, fp: IO[str], **kwargs: Any) -> None:
  668. """Convert obj to json, with some extra encodable types."""
  669. return dump(obj, fp, cls=WandBJSONEncoder, **kwargs)
  670. def json_dumps_safer(obj: Any, **kwargs: Any) -> str:
  671. """Convert obj to json, with some extra encodable types."""
  672. return dumps(obj, cls=WandBJSONEncoder, **kwargs)
  673. # This is used for dumping raw json into files
  674. def json_dump_uncompressed(obj: Any, fp: IO[str], **kwargs: Any) -> None:
  675. """Convert obj to json, with some extra encodable types."""
  676. return dump(obj, fp, cls=JSONEncoderUncompressed, **kwargs)
  677. def json_dumps_safer_history(obj: Any, **kwargs: Any) -> str:
  678. """Convert obj to json, with some extra encodable types, including histograms."""
  679. return dumps(obj, cls=WandBHistoryJSONEncoder, **kwargs)
  680. def make_json_if_not_number(
  681. v: int | float | str | Mapping | Sequence,
  682. ) -> int | float | str:
  683. """If v is not a basic type convert it to json."""
  684. if isinstance(v, (float, int)):
  685. return v
  686. return json_dumps_safer(v)
  687. def make_safe_for_json(obj: Any) -> Any:
  688. """Replace invalid json floats with strings. Also converts to lists and dicts."""
  689. if isinstance(obj, Mapping):
  690. return {k: make_safe_for_json(v) for k, v in obj.items()}
  691. elif isinstance(obj, str):
  692. # str's are Sequence, so we need to short-circuit
  693. return obj
  694. elif isinstance(obj, Sequence):
  695. return [make_safe_for_json(v) for v in obj]
  696. elif isinstance(obj, float):
  697. # W&B backend and UI handle these strings
  698. if obj != obj: # standard way to check for NaN
  699. return "NaN"
  700. elif obj == float("+inf"):
  701. return "Infinity"
  702. elif obj == float("-inf"):
  703. return "-Infinity"
  704. return obj
  705. def no_retry_4xx(e: Exception) -> bool:
  706. from requests import HTTPError
  707. if not isinstance(e, HTTPError):
  708. return True
  709. assert e.response is not None
  710. if not (400 <= e.response.status_code < 500) or e.response.status_code == 429:
  711. return True
  712. body = json.loads(e.response.content)
  713. raise UsageError(body["errors"][0]["message"])
  714. def parse_backend_error_messages(response: Response) -> list[str]:
  715. """Returns error messages stored in a backend response.
  716. If the response is not in an expected format, an empty list is returned.
  717. Args:
  718. response: A response to an HTTP request to the W&B server.
  719. """
  720. from requests import JSONDecodeError
  721. try:
  722. data = response.json()
  723. except JSONDecodeError:
  724. return []
  725. if not isinstance(data, dict):
  726. return []
  727. # Backend error values are returned in one of two ways:
  728. # - A string containing the error message
  729. # - A JSON object with a "message" field that is a string
  730. def get_message(error: Any) -> str | None:
  731. if isinstance(error, str):
  732. return error
  733. elif (
  734. isinstance(error, dict)
  735. and (message := error.get("message"))
  736. and isinstance(message, str)
  737. ):
  738. return message
  739. else:
  740. return None
  741. # The response can contain an "error" field with a single error
  742. # or an "errors" field with a list of errors.
  743. if error := data.get("error"):
  744. message = get_message(error)
  745. return [message] if message else []
  746. elif (errors := data.get("errors")) and isinstance(errors, list):
  747. messages: list[str] = []
  748. for error in errors:
  749. message = get_message(error)
  750. if message:
  751. messages.append(message)
  752. return messages
  753. else:
  754. return []
  755. def no_retry_auth(e: Any) -> bool:
  756. from requests import HTTPError
  757. if hasattr(e, "exception"):
  758. e = e.exception
  759. if not isinstance(e, HTTPError):
  760. return True
  761. if e.response is None:
  762. return True
  763. # Don't retry bad request errors; raise immediately
  764. if e.response.status_code in (400, 409):
  765. return False
  766. # Retry all non-forbidden/unauthorized/not-found errors.
  767. if e.response.status_code not in (401, 403, 404):
  768. return True
  769. # Crash with more informational message on forbidden/unauthorized errors.
  770. # UnauthorizedError
  771. if e.response.status_code == 401:
  772. raise AuthenticationError(
  773. "The API key you provided is either invalid or missing. "
  774. f"If the `{wandb.env.API_KEY}` environment variable is set, make sure it is correct. "
  775. "Otherwise, to resolve this issue, you may try running the 'wandb login --relogin' command. "
  776. "If you are using a local server, make sure that you're using the correct hostname. "
  777. "If you're not sure, you can try logging in again using the 'wandb login --relogin --host [hostname]' command."
  778. f"(Error {e.response.status_code}: {e.response.reason})"
  779. )
  780. # ForbiddenError
  781. if e.response.status_code == 403:
  782. if wandb.run:
  783. raise CommError(f"Permission denied to access {wandb.run.path}")
  784. else:
  785. raise CommError(
  786. "It appears that you do not have permission to access the requested resource. "
  787. "Please reach out to the project owner to grant you access. "
  788. "If you have the correct permissions, verify that there are no issues with your networking setup."
  789. f"(Error {e.response.status_code}: {e.response.reason})"
  790. )
  791. # NotFoundError
  792. if e.response.status_code == 404:
  793. # If error message is empty, raise a more generic NotFoundError message.
  794. if parse_backend_error_messages(e.response):
  795. return False
  796. else:
  797. raise LookupError(
  798. f"Failed to find resource. Please make sure you have the correct resource path. "
  799. f"(Error {e.response.status_code}: {e.response.reason})"
  800. )
  801. return False
  802. def check_retry_conflict(e: Any) -> bool | None:
  803. """Check if the exception is a conflict type so it can be retried.
  804. Returns:
  805. True - Should retry this operation
  806. False - Should not retry this operation
  807. None - No decision, let someone else decide
  808. """
  809. from requests import HTTPError
  810. if hasattr(e, "exception"):
  811. e = e.exception
  812. if (
  813. isinstance(e, HTTPError)
  814. and e.response is not None
  815. and e.response.status_code == 409
  816. ):
  817. return True
  818. return None
  819. def check_retry_conflict_or_gone(e: Any) -> bool | None:
  820. """Check if the exception is a conflict or gone type, so it can be retried or not.
  821. Returns:
  822. True - Should retry this operation
  823. False - Should not retry this operation
  824. None - No decision, let someone else decide
  825. """
  826. from requests import HTTPError
  827. if hasattr(e, "exception"):
  828. e = e.exception
  829. if isinstance(e, HTTPError) and e.response is not None:
  830. if e.response.status_code == 409:
  831. return True
  832. if e.response.status_code == 410:
  833. return False
  834. return None
  835. def make_check_retry_fn(
  836. fallback_retry_fn: CheckRetryFnType,
  837. check_fn: Callable[[Exception], bool | None],
  838. check_timedelta: timedelta | None = None,
  839. ) -> CheckRetryFnType:
  840. """Return a check_retry_fn which can be used by lib.Retry().
  841. Args:
  842. fallback_fn: Use this function if check_fn didn't decide if a retry should happen.
  843. check_fn: Function which returns bool if retry should happen or None if unsure.
  844. check_timedelta: Optional retry timeout if we check_fn matches the exception
  845. """
  846. def check_retry_fn(e: Exception) -> bool | timedelta:
  847. check = check_fn(e)
  848. if check is None:
  849. return fallback_retry_fn(e)
  850. if check is False:
  851. return False
  852. if check_timedelta:
  853. return check_timedelta
  854. return True
  855. return check_retry_fn
  856. def find_runner(program: str) -> None | list | list[str]:
  857. """Return a command that will run program.
  858. Args:
  859. program: The string name of the program to try to run.
  860. Returns:
  861. commandline list of strings to run the program (eg. with subprocess.call()) or None
  862. """
  863. if os.path.isfile(program) and not os.access(program, os.X_OK):
  864. # program is a path to a non-executable file
  865. try:
  866. opened = open(program)
  867. except OSError: # PermissionError doesn't exist in 2.7
  868. return None
  869. first_line = opened.readline().strip()
  870. if first_line.startswith("#!"):
  871. return shlex.split(first_line[2:])
  872. if program.endswith(".py"):
  873. return [sys.executable]
  874. return None
  875. def downsample(values: Sequence, target_length: int) -> list:
  876. """Downsample 1d values to target_length, including start and end.
  877. Algorithm just rounds index down.
  878. Values can be any sequence, including a generator.
  879. """
  880. if not target_length > 1:
  881. raise UsageError("target_length must be > 1")
  882. values = list(values)
  883. if len(values) < target_length:
  884. return values
  885. ratio = float(len(values) - 1) / (target_length - 1)
  886. result = []
  887. for i in range(target_length):
  888. result.append(values[int(i * ratio)])
  889. return result
  890. def has_num(dictionary: Mapping, key: Any) -> bool:
  891. return key in dictionary and isinstance(dictionary[key], numbers.Number)
  892. def docker_image_regex(image: str) -> Any:
  893. """Regex match for valid docker image names."""
  894. if image:
  895. return re.match(
  896. r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(?<!-)(?:\.(?!-)[a-zA-Z0-9-]{1,63}(?<!-))*(?::[0-9]{1,5})?/)?((?![._-])(?:[a-z0-9._-]*)(?<![._-])(?:/(?![._-])[a-z0-9._-]*(?<![._-]))*)(?::(?![.-])[a-zA-Z0-9_.-]{1,128})?$",
  897. image,
  898. )
  899. return None
  900. def image_from_docker_args(args: list[str]) -> str | None:
  901. """Scan docker run args and attempt to find the most likely docker image argument.
  902. It excludes any arguments that start with a dash, and the argument after it if it
  903. isn't a boolean switch. This can be improved, we currently fallback gracefully when
  904. this fails.
  905. """
  906. bool_args = [
  907. "-t",
  908. "--tty",
  909. "--rm",
  910. "--privileged",
  911. "--oom-kill-disable",
  912. "--no-healthcheck",
  913. "-i",
  914. "--interactive",
  915. "--init",
  916. "--help",
  917. "--detach",
  918. "-d",
  919. "--sig-proxy",
  920. "-it",
  921. "-itd",
  922. ]
  923. last_flag = -2
  924. last_arg = ""
  925. possible_images = []
  926. if len(args) > 0 and args[0] == "run":
  927. args.pop(0)
  928. for i, arg in enumerate(args):
  929. if arg.startswith("-"):
  930. last_flag = i
  931. last_arg = arg
  932. elif "@sha256:" in arg:
  933. # Because our regex doesn't match digests
  934. possible_images.append(arg)
  935. elif docker_image_regex(arg):
  936. if last_flag == i - 2:
  937. possible_images.append(arg)
  938. elif "=" in last_arg:
  939. possible_images.append(arg)
  940. elif last_arg in bool_args and last_flag == i - 1:
  941. possible_images.append(arg)
  942. most_likely = None
  943. for img in possible_images:
  944. if ":" in img or "@" in img or "/" in img:
  945. most_likely = img
  946. break
  947. if most_likely is None and len(possible_images) > 0:
  948. most_likely = possible_images[0]
  949. return most_likely
  950. def load_yaml(file: Any) -> Any:
  951. import yaml
  952. return yaml.safe_load(file)
  953. def image_id_from_k8s() -> str | None:
  954. """Ping the k8s metadata service for the image id.
  955. Specify the KUBERNETES_NAMESPACE environment variable if your pods are not in the
  956. default namespace:
  957. - name: KUBERNETES_NAMESPACE valueFrom:
  958. fieldRef:
  959. fieldPath: metadata.namespace
  960. """
  961. token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
  962. if not os.path.exists(token_path):
  963. return None
  964. try:
  965. with open(token_path) as token_file:
  966. token = token_file.read()
  967. except FileNotFoundError:
  968. logger.warning(f"Token file not found at {token_path}.")
  969. return None
  970. except PermissionError as e:
  971. current_uid = os.getuid()
  972. warning = (
  973. f"Unable to read the token file at {token_path} due to permission error ({e})."
  974. f"The current user id is {current_uid}. "
  975. "Consider changing the securityContext to run the container as the current user."
  976. )
  977. logger.warning(warning)
  978. wandb.termwarn(warning)
  979. return None
  980. if not token:
  981. return None
  982. import requests
  983. k8s_server = "https://{}:{}/api/v1/namespaces/{}/pods/{}".format(
  984. os.getenv("KUBERNETES_SERVICE_HOST"),
  985. os.getenv("KUBERNETES_PORT_443_TCP_PORT"),
  986. os.getenv("KUBERNETES_NAMESPACE", "default"),
  987. os.getenv("HOSTNAME"),
  988. )
  989. try:
  990. res = requests.get(
  991. k8s_server,
  992. verify="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
  993. timeout=3,
  994. headers={"Authorization": f"Bearer {token}"},
  995. )
  996. res.raise_for_status()
  997. except requests.RequestException:
  998. return None
  999. try:
  1000. return str( # noqa: B005
  1001. res.json()["status"]["containerStatuses"][0]["imageID"]
  1002. ).strip("docker-pullable://")
  1003. except (ValueError, KeyError, IndexError):
  1004. logger.exception("Error checking kubernetes for image id")
  1005. return None
  1006. def async_call(target: Callable, timeout: int | float | None = None) -> Callable:
  1007. """Wrap a method to run in the background with an optional timeout.
  1008. Returns a new method that will call the original with any args, waiting for upto
  1009. timeout seconds. This new method blocks on the original and returns the result or
  1010. None if timeout was reached, along with the thread. You can check thread.is_alive()
  1011. to determine if a timeout was reached. If an exception is thrown in the thread, we
  1012. reraise it.
  1013. """
  1014. q: queue.Queue = queue.Queue()
  1015. def wrapped_target(q: queue.Queue, *args: Any, **kwargs: Any) -> Any:
  1016. try:
  1017. q.put(target(*args, **kwargs))
  1018. except Exception as e:
  1019. q.put(e)
  1020. def wrapper(
  1021. *args: Any, **kwargs: Any
  1022. ) -> tuple[Exception, threading.Thread] | tuple[None, threading.Thread]:
  1023. thread = threading.Thread(
  1024. target=wrapped_target, args=(q,) + args, kwargs=kwargs
  1025. )
  1026. thread.daemon = True
  1027. thread.start()
  1028. try:
  1029. result = q.get(True, timeout)
  1030. except queue.Empty:
  1031. return None, thread
  1032. if isinstance(result, Exception):
  1033. raise result.with_traceback(sys.exc_info()[2])
  1034. return result, thread
  1035. return wrapper
  1036. def read_many_from_queue(
  1037. q: queue.Queue, max_items: int, queue_timeout: int | float
  1038. ) -> list:
  1039. try:
  1040. item = q.get(True, queue_timeout)
  1041. except queue.Empty:
  1042. return []
  1043. items = [item]
  1044. for _ in range(max_items):
  1045. try:
  1046. item = q.get_nowait()
  1047. except queue.Empty:
  1048. return items
  1049. items.append(item)
  1050. return items
  1051. def stopwatch_now() -> float:
  1052. """Get a time value for interval comparisons.
  1053. When possible it is a monotonic clock to prevent backwards time issues.
  1054. """
  1055. return time.monotonic()
  1056. def class_colors(class_count: int) -> list[list[int]]:
  1057. # make class 0 black, and the rest equally spaced fully saturated hues
  1058. return [[0, 0, 0]] + [
  1059. colorsys.hsv_to_rgb(i / (class_count - 1.0), 1.0, 1.0) # type: ignore
  1060. for i in range(class_count - 1)
  1061. ]
  1062. def prompt_choices(
  1063. choices: Sequence[str],
  1064. input_timeout: float | None = None,
  1065. ) -> str:
  1066. """Prompt the user to choose from a list of options.
  1067. If exactly one choice is given, it is returned immediately.
  1068. Raises:
  1069. TimeoutError: if input_timeout is specified and expires.
  1070. NotATerminalError: if the output device is not capable.
  1071. KeyboardInterrupt: if the user aborts by pressing Ctrl+C.
  1072. """
  1073. if len(choices) == 1:
  1074. return choices[0]
  1075. for i, choice_str in enumerate(choices):
  1076. wandb.termlog(f"({i + 1}) {choice_str}")
  1077. while True:
  1078. choice = terminput("Enter your choice: ", timeout=input_timeout)
  1079. # If the user presses enter without typing anything, try again.
  1080. if not choice:
  1081. continue
  1082. idx = -1
  1083. with contextlib.suppress(ValueError):
  1084. idx = int(choice) - 1
  1085. if idx < 0 or idx > len(choices) - 1:
  1086. wandb.termwarn("Invalid choice")
  1087. continue
  1088. result = choices[idx]
  1089. wandb.termlog(f"You chose {result!r}")
  1090. return result
  1091. def guess_data_type(shape: Sequence[int], risky: bool = False) -> str | None:
  1092. """Infer the type of data based on the shape of the tensors.
  1093. Args:
  1094. shape (Sequence[int]): The shape of the data
  1095. risky(bool): some guesses are more likely to be wrong.
  1096. """
  1097. # (samples,) or (samples,logits)
  1098. if len(shape) in (1, 2):
  1099. return "label"
  1100. # Assume image mask like fashion mnist: (no color channel)
  1101. # This is risky because RNNs often have 3 dim tensors: batch, time, channels
  1102. if risky and len(shape) == 3:
  1103. return "image"
  1104. if len(shape) == 4:
  1105. if shape[-1] in (1, 3, 4):
  1106. # (samples, height, width, Y \ RGB \ RGBA)
  1107. return "image"
  1108. else:
  1109. # (samples, height, width, logits)
  1110. return "segmentation_mask"
  1111. return None
  1112. def download_file_from_url(
  1113. dest_path: str, source_url: str, api_key: str | None = None
  1114. ) -> None:
  1115. import requests
  1116. auth = ("api", api_key or "")
  1117. response = requests.get(
  1118. source_url,
  1119. auth=auth,
  1120. stream=True,
  1121. timeout=5,
  1122. )
  1123. response.raise_for_status()
  1124. if os.sep in dest_path:
  1125. filesystem.mkdir_exists_ok(os.path.dirname(dest_path))
  1126. with fsync_open(dest_path, "wb") as file:
  1127. for data in response.iter_content(chunk_size=1024):
  1128. file.write(data)
  1129. def download_file_into_memory(source_url: str, api_key: str | None = None) -> bytes:
  1130. import requests
  1131. auth = ("api", api_key or "")
  1132. response = requests.get(
  1133. source_url,
  1134. auth=auth,
  1135. stream=True,
  1136. timeout=5,
  1137. )
  1138. response.raise_for_status()
  1139. return response.content
  1140. def isatty(ob: IO) -> bool:
  1141. return hasattr(ob, "isatty") and ob.isatty()
  1142. def to_human_size(size: int, units: list[tuple[str, Any]] | None = None) -> str:
  1143. units = units or POW_10_BYTES
  1144. unit, value = units[0]
  1145. factor = round(float(size) / value, 1)
  1146. return (
  1147. f"{factor}{unit}"
  1148. if factor < 1024 or len(units) == 1
  1149. else to_human_size(size, units[1:])
  1150. )
  1151. def from_human_size(size: str, units: list[tuple[str, Any]] | None = None) -> int:
  1152. units = units or POW_10_BYTES
  1153. units_dict = {unit.upper(): value for (unit, value) in units}
  1154. regex = re.compile(
  1155. r"(\d+\.?\d*)\s*({})?".format("|".join(units_dict.keys())), re.IGNORECASE
  1156. )
  1157. match = re.match(regex, size)
  1158. if not match:
  1159. raise ValueError("size must be of the form `10`, `10B` or `10 B`.")
  1160. factor, unit = (
  1161. float(match.group(1)),
  1162. units_dict[match.group(2).upper()] if match.group(2) else 1,
  1163. )
  1164. return int(factor * unit)
  1165. def auto_project_name(program: str | None) -> str:
  1166. # if we're in git, set project name to git repo name + relative path within repo
  1167. from wandb.sdk.lib.gitlib import GitRepo
  1168. root_dir = GitRepo().root_dir
  1169. if root_dir is None:
  1170. return "uncategorized"
  1171. # On windows, GitRepo returns paths in unix style, but os.path is windows
  1172. # style. Coerce here.
  1173. root_dir = to_native_slash_path(root_dir)
  1174. repo_name = os.path.basename(root_dir)
  1175. if program is None:
  1176. return str(repo_name)
  1177. if not os.path.isabs(program):
  1178. program = os.path.join(os.curdir, program)
  1179. prog_dir = os.path.dirname(os.path.abspath(program))
  1180. if not prog_dir.startswith(root_dir):
  1181. return str(repo_name)
  1182. project = repo_name
  1183. sub_path = os.path.relpath(prog_dir, root_dir)
  1184. if sub_path != ".":
  1185. project += "-" + sub_path
  1186. return str(project.replace(os.sep, "_"))
  1187. # TODO(hugh): Deprecate version here and use wandb/sdk/lib/paths.py
  1188. def to_forward_slash_path(path: str) -> str:
  1189. if platform.system() == "Windows":
  1190. path = path.replace("\\", "/")
  1191. return path
  1192. # TODO(hugh): Deprecate version here and use wandb/sdk/lib/paths.py
  1193. def to_native_slash_path(path: str) -> FilePathStr:
  1194. return FilePathStr(path.replace("/", os.sep))
  1195. def check_and_warn_old(files: list[str]) -> bool:
  1196. if "wandb-metadata.json" in files:
  1197. wandb.termwarn("These runs were logged with a previous version of wandb.")
  1198. wandb.termwarn(
  1199. "Run pip install wandb<0.10.0 to get the old library and sync your runs."
  1200. )
  1201. return True
  1202. return False
  1203. class ImportMetaHook:
  1204. def __init__(self) -> None:
  1205. self.modules: dict[str, ModuleType] = dict()
  1206. self.on_import: dict[str, list] = dict()
  1207. def add(self, fullname: str, on_import: Callable) -> None:
  1208. self.on_import.setdefault(fullname, []).append(on_import)
  1209. def install(self) -> None:
  1210. sys.meta_path.insert(0, self) # type: ignore
  1211. def uninstall(self) -> None:
  1212. sys.meta_path.remove(self) # type: ignore
  1213. def find_module(
  1214. self, fullname: str, path: str | None = None
  1215. ) -> ImportMetaHook | None:
  1216. if fullname in self.on_import:
  1217. return self
  1218. return None
  1219. def load_module(self, fullname: str) -> ModuleType:
  1220. self.uninstall()
  1221. mod = importlib.import_module(fullname)
  1222. self.install()
  1223. self.modules[fullname] = mod
  1224. on_imports = self.on_import.get(fullname)
  1225. if on_imports:
  1226. for f in on_imports:
  1227. f()
  1228. return mod
  1229. def get_modules(self) -> tuple[str, ...]:
  1230. return tuple(self.modules)
  1231. def get_module(self, module: str) -> ModuleType:
  1232. return self.modules[module]
  1233. _import_hook: ImportMetaHook | None = None
  1234. def add_import_hook(fullname: str, on_import: Callable) -> None:
  1235. global _import_hook
  1236. if _import_hook is None:
  1237. _import_hook = ImportMetaHook()
  1238. _import_hook.install()
  1239. _import_hook.add(fullname, on_import)
  1240. def host_from_path(path: str | None) -> str:
  1241. """Return the host of the path."""
  1242. url = urllib.parse.urlparse(path)
  1243. return str(url.netloc)
  1244. def uri_from_path(path: str | None) -> str:
  1245. """Return the URI of the path."""
  1246. url = urllib.parse.urlparse(path)
  1247. uri = url.path if url.path[0] != "/" else url.path[1:]
  1248. return str(uri)
  1249. def is_unicode_safe(stream: TextIO) -> bool:
  1250. """Return True if the stream supports UTF-8."""
  1251. encoding = getattr(stream, "encoding", None)
  1252. return encoding.lower() in {"utf-8", "utf_8"} if encoding else False
  1253. def rand_alphanumeric(
  1254. length: int = 8, rand: ModuleType | random.Random | None = None
  1255. ) -> str:
  1256. wandb.termerror("rand_alphanumeric is deprecated, use 'secrets.token_hex'")
  1257. rand = rand or random
  1258. return "".join(rand.choice("0123456789ABCDEF") for _ in range(length))
  1259. @contextlib.contextmanager
  1260. def fsync_open(
  1261. path: StrPath, mode: str = "w", encoding: str | None = None
  1262. ) -> Generator[IO[Any], None, None]:
  1263. """Open a path for I/O and guarantee that the file is flushed and synced."""
  1264. with open(path, mode, encoding=encoding) as f:
  1265. yield f
  1266. f.flush()
  1267. os.fsync(f.fileno())
  1268. def _is_kaggle() -> bool:
  1269. return (
  1270. os.getenv("KAGGLE_KERNEL_RUN_TYPE") is not None
  1271. or "kaggle_environments" in sys.modules
  1272. )
  1273. def _has_internet() -> bool:
  1274. """Returns whether we have internet access.
  1275. Checks for internet access by attempting to open a DNS connection to
  1276. Google's root servers.
  1277. """
  1278. try:
  1279. s = socket.create_connection(("8.8.8.8", 53), 0.5)
  1280. s.close()
  1281. except OSError:
  1282. return False
  1283. return True
  1284. def _is_likely_kaggle() -> bool:
  1285. # Telemetry to mark first runs from Kagglers.
  1286. return (
  1287. _is_kaggle()
  1288. or os.path.exists(
  1289. os.path.expanduser(os.path.join("~", ".kaggle", "kaggle.json"))
  1290. )
  1291. or "kaggle" in sys.modules
  1292. )
  1293. def _is_databricks() -> bool:
  1294. # check if we are running inside a databricks notebook by
  1295. # inspecting sys.modules, searching for dbutils and verifying that
  1296. # it has the appropriate structure
  1297. if "dbutils" in sys.modules:
  1298. dbutils = sys.modules["dbutils"]
  1299. if hasattr(dbutils, "shell"):
  1300. shell = dbutils.shell
  1301. if hasattr(shell, "sc"):
  1302. sc = shell.sc
  1303. if hasattr(sc, "appName"):
  1304. return bool(sc.appName == "Databricks Shell")
  1305. return False
  1306. def _is_py_requirements_or_dockerfile(path: str) -> bool:
  1307. file = os.path.basename(path)
  1308. return (
  1309. file.endswith(".py")
  1310. or file.startswith("Dockerfile")
  1311. or file == "requirements.txt"
  1312. )
  1313. def artifact_to_json(artifact: Artifact) -> dict[str, Any]:
  1314. return {
  1315. "_type": "artifactVersion",
  1316. "_version": "v0",
  1317. "id": artifact.id,
  1318. "version": artifact.source_version,
  1319. "sequenceName": artifact.source_name.split(":")[0],
  1320. "usedAs": artifact.use_as,
  1321. }
  1322. def check_dict_contains_nested_artifact(d: dict, nested: bool = False) -> bool:
  1323. for item in d.values():
  1324. if isinstance(item, dict):
  1325. contains_artifacts = check_dict_contains_nested_artifact(item, True)
  1326. if contains_artifacts:
  1327. return True
  1328. elif (isinstance(item, wandb.Artifact) or _is_artifact_string(item)) and nested:
  1329. return True
  1330. return False
  1331. def load_json_yaml_dict(config: str) -> Any:
  1332. import yaml
  1333. ext = os.path.splitext(config)[-1]
  1334. if ext == ".json":
  1335. with open(config) as f:
  1336. return json.load(f)
  1337. elif ext == ".yaml":
  1338. with open(config) as f:
  1339. return yaml.safe_load(f)
  1340. else:
  1341. try:
  1342. return json.loads(config)
  1343. except ValueError:
  1344. return None
  1345. def _parse_entity_project_item(path: str) -> tuple:
  1346. """Parse paths with the following formats: {item}, {project}/{item}, & {entity}/{project}/{item}.
  1347. Args:
  1348. path: `str`, input path; must be between 0 and 3 in length.
  1349. Returns:
  1350. tuple of length 3 - (item, project, entity)
  1351. Example:
  1352. alias, project, entity = _parse_entity_project_item("myproj/mymodel:best")
  1353. assert entity == ""
  1354. assert project == "myproj"
  1355. assert alias == "mymodel:best"
  1356. """
  1357. words = path.split("/")
  1358. if len(words) > 3:
  1359. raise ValueError(
  1360. "Invalid path: must be str the form {item}, {project}/{item}, or {entity}/{project}/{item}"
  1361. )
  1362. padded_words = [""] * (3 - len(words)) + words
  1363. return tuple(reversed(padded_words))
  1364. def _resolve_aliases(aliases: str | Iterable[str] | None) -> list[str]:
  1365. """Add the 'latest' alias and ensure that all aliases are unique.
  1366. Takes in `aliases` which can be None, str, or List[str] and returns list[str].
  1367. Ensures that "latest" is always present in the returned list.
  1368. Args:
  1369. aliases: `aliases: str | Iterable[str] | None`
  1370. Returns:
  1371. list[str], with "latest" always present.
  1372. Usage:
  1373. ```python
  1374. aliases = _resolve_aliases(["best", "dev"])
  1375. assert aliases == ["best", "dev", "latest"]
  1376. aliases = _resolve_aliases("boom")
  1377. assert aliases == ["boom", "latest"]
  1378. ```
  1379. """
  1380. aliases = aliases or ["latest"]
  1381. if isinstance(aliases, str):
  1382. aliases = [aliases]
  1383. try:
  1384. return list(set(aliases) | {"latest"})
  1385. except TypeError as exc:
  1386. raise ValueError("`aliases` must be Iterable or None") from exc
  1387. def _is_artifact_object(v: Any) -> TypeGuard[wandb.Artifact]:
  1388. return isinstance(v, wandb.Artifact)
  1389. def _is_artifact_string(v: Any) -> TypeGuard[str]:
  1390. return isinstance(v, str) and v.startswith("wandb-artifact://")
  1391. def _is_artifact_version_weave_dict(v: Any) -> TypeGuard[dict]:
  1392. return isinstance(v, dict) and v.get("_type") == "artifactVersion"
  1393. def _is_artifact_representation(v: Any) -> bool:
  1394. return (
  1395. _is_artifact_object(v)
  1396. or _is_artifact_string(v)
  1397. or _is_artifact_version_weave_dict(v)
  1398. )
  1399. def parse_artifact_string(v: str) -> tuple[str, str | None, bool]:
  1400. if not v.startswith("wandb-artifact://"):
  1401. raise ValueError(f"Invalid artifact string: {v}")
  1402. parsed_v = v[len("wandb-artifact://") :]
  1403. base_uri = None
  1404. url_info = urllib.parse.urlparse(parsed_v)
  1405. if url_info.scheme != "":
  1406. base_uri = f"{url_info.scheme}://{url_info.netloc}"
  1407. parts = url_info.path.split("/")[1:]
  1408. else:
  1409. parts = parsed_v.split("/")
  1410. if parts[0] == "_id":
  1411. # for now can't fetch paths but this will be supported in the future
  1412. # when we allow passing typed media objects, this can be extended
  1413. # to include paths
  1414. return parts[1], base_uri, True
  1415. if len(parts) < 3:
  1416. raise ValueError(f"Invalid artifact string: {v}")
  1417. # for now can't fetch paths but this will be supported in the future
  1418. # when we allow passing typed media objects, this can be extended
  1419. # to include paths
  1420. entity, project, name_and_alias_or_version = parts[:3]
  1421. return f"{entity}/{project}/{name_and_alias_or_version}", base_uri, False
  1422. def _get_max_cli_version() -> str | None:
  1423. max_cli_version = wandb.api.max_cli_version()
  1424. return str(max_cli_version) if max_cli_version is not None else None
  1425. def ensure_text(
  1426. string: str | bytes, encoding: str = "utf-8", errors: str = "strict"
  1427. ) -> str:
  1428. """Coerce s to str."""
  1429. if isinstance(string, bytes):
  1430. return string.decode(encoding, errors)
  1431. elif isinstance(string, str):
  1432. return string
  1433. else:
  1434. raise TypeError(f"not expecting type {type(string)!r}")
  1435. def make_artifact_name_safe(name: str) -> str:
  1436. """Make an artifact name safe for use in artifacts."""
  1437. # artifact names may only contain alphanumeric characters, dashes, underscores, and dots.
  1438. cleaned = re.sub(r"[^a-zA-Z0-9_\-.]", "_", name)
  1439. if len(cleaned) <= 128:
  1440. return cleaned
  1441. # truncate with dots in the middle using regex
  1442. return re.sub(r"(^.{63}).*(.{63}$)", r"\g<1>..\g<2>", cleaned)
  1443. def make_docker_image_name_safe(name: str) -> str:
  1444. """Make a docker image name safe for use in artifacts."""
  1445. safe_chars = RE_DOCKER_IMAGE_NAME_CHARS.sub("__", name.lower())
  1446. deduped = RE_DOCKER_IMAGE_NAME_SEPARATOR_REPEAT.sub("__", safe_chars)
  1447. trimmed_start = RE_DOCKER_IMAGE_NAME_SEPARATOR_START.sub("", deduped)
  1448. trimmed = RE_DOCKER_IMAGE_NAME_SEPARATOR_END.sub("", trimmed_start)
  1449. return trimmed if trimmed else "image"
  1450. def merge_dicts(
  1451. source: dict[str, Any],
  1452. destination: dict[str, Any],
  1453. ) -> dict[str, Any]:
  1454. """Recursively merge two dictionaries.
  1455. This mutates the destination and its nested dictionaries and lists.
  1456. Instances of `dict` are recursively merged and instances of `list`
  1457. are appended to the destination. If the destination type is not
  1458. `dict` or `list`, respectively, the key is overwritten with the
  1459. source value.
  1460. For all other types, the source value overwrites the destination value.
  1461. """
  1462. for key, value in source.items():
  1463. if isinstance(value, dict):
  1464. node = destination.get(key)
  1465. if isinstance(node, dict):
  1466. merge_dicts(value, node)
  1467. else:
  1468. destination[key] = value
  1469. elif isinstance(value, list):
  1470. dest_value = destination.get(key)
  1471. if isinstance(dest_value, list):
  1472. dest_value.extend(value)
  1473. else:
  1474. destination[key] = value
  1475. else:
  1476. destination[key] = value
  1477. return destination
  1478. def coalesce(*arg: Any) -> Any:
  1479. """Return the first non-none value in the list of arguments.
  1480. Similar to ?? in C#.
  1481. """
  1482. return next((a for a in arg if a is not None), None)
  1483. def recursive_cast_dictlike_to_dict(d: dict[str, Any]) -> dict[str, Any]:
  1484. for k, v in d.items():
  1485. if isinstance(v, dict):
  1486. recursive_cast_dictlike_to_dict(v)
  1487. elif hasattr(v, "keys"):
  1488. d[k] = dict(v)
  1489. recursive_cast_dictlike_to_dict(d[k])
  1490. return d
  1491. def remove_keys_with_none_values(d: dict[str, Any] | Any) -> dict[str, Any] | Any:
  1492. # otherwise iterrows will create a bunch of ugly charts
  1493. if not isinstance(d, dict):
  1494. return d
  1495. if isinstance(d, dict):
  1496. new_dict = {}
  1497. for k, v in d.items():
  1498. new_v = remove_keys_with_none_values(v)
  1499. if new_v is not None and not (isinstance(new_v, dict) and len(new_v) == 0):
  1500. new_dict[k] = new_v
  1501. return new_dict if new_dict else None
  1502. def batched(n: int, iterable: Iterable[T]) -> Generator[list[T], None, None]:
  1503. i = iter(iterable)
  1504. batch = list(itertools.islice(i, n))
  1505. while batch:
  1506. yield batch
  1507. batch = list(itertools.islice(i, n))
  1508. def random_string(length: int = 12) -> str:
  1509. """Generate a random string of a given length.
  1510. :param length: Length of the string to generate.
  1511. :return: Random string.
  1512. """
  1513. return "".join(
  1514. secrets.choice(string.ascii_lowercase + string.digits) for _ in range(length)
  1515. )
  1516. def sample_with_exponential_decay_weights(
  1517. xs: Iterable | Iterable[Iterable],
  1518. ys: Iterable[Iterable],
  1519. keys: Iterable | None = None,
  1520. sample_size: int = 1500,
  1521. ) -> tuple[list, list, list | None]:
  1522. """Sample from a list of lists with weights that decay exponentially.
  1523. May be used with the wandb.plot.line_series function.
  1524. """
  1525. xs_array = np.array(xs)
  1526. ys_array = np.array(ys)
  1527. keys_array = np.array(keys) if keys else None
  1528. weights = np.exp(-np.arange(len(xs_array)) / len(xs_array))
  1529. weights /= np.sum(weights)
  1530. sampled_indices = np.random.choice(len(xs_array), size=sample_size, p=weights)
  1531. sampled_xs = xs_array[sampled_indices].tolist()
  1532. sampled_ys = ys_array[sampled_indices].tolist()
  1533. sampled_keys = keys_array[sampled_indices].tolist() if keys_array else None
  1534. return sampled_xs, sampled_ys, sampled_keys
  1535. @dataclasses.dataclass(frozen=True)
  1536. class InstalledDistribution:
  1537. """An installed distribution.
  1538. Attributes:
  1539. key: The distribution name as it would be imported.
  1540. version: The distribution's version string.
  1541. """
  1542. key: str
  1543. version: str
  1544. def working_set() -> Iterable[InstalledDistribution]:
  1545. """Return the working set of installed distributions."""
  1546. from importlib.metadata import distributions
  1547. for d in distributions():
  1548. with contextlib.suppress(KeyError, UnicodeDecodeError, TypeError):
  1549. # In some distributions, the "Name" attribute may not be present,
  1550. # or the metadata itself may be None or malformed, which can raise
  1551. # KeyError, UnicodeDecodeError, or TypeError.
  1552. # For additional context, see: https://github.com/python/importlib_metadata/issues/371.
  1553. yield InstalledDistribution(key=d.metadata["Name"], version=d.version)
  1554. def get_core_path() -> str:
  1555. """Returns the path to the wandb-core binary.
  1556. Returns:
  1557. str: The path to the wandb-core package.
  1558. Raises:
  1559. WandbCoreNotAvailableError: If wandb-core was not built for the current system.
  1560. """
  1561. bin_path = pathlib.Path(__file__).parent / "bin" / "wandb-core"
  1562. if not bin_path.exists():
  1563. raise WandbCoreNotAvailableError(
  1564. f"File not found: {bin_path}."
  1565. " Please contact support at support@wandb.com."
  1566. f" Your platform is: {platform.platform()}."
  1567. )
  1568. return str(bin_path)
  1569. def time_string_to_seconds(time_str: str) -> int:
  1570. """Parse a time period string and return seconds.
  1571. Args:
  1572. time_str: Time period string like "10s", "5m", "8h", "8d", "6M", "1y"
  1573. Accepted values are:
  1574. - s (seconds)
  1575. - m (minutes)
  1576. - h (hours)
  1577. - d (days)
  1578. - M (months)
  1579. - y (years)
  1580. Returns:
  1581. Number of seconds in the time period
  1582. Raises:
  1583. ValueError: If the format is invalid
  1584. Examples:
  1585. >>> parse_time_period("10s")
  1586. 10
  1587. >>> parse_time_period("5m")
  1588. 300
  1589. >>> parse_time_period("8d")
  1590. 691200
  1591. >>> parse_time_period("6M")
  1592. 15552000
  1593. >>> parse_time_period("1y")
  1594. 31536000
  1595. """
  1596. import re
  1597. if not time_str:
  1598. return 0
  1599. # Extract number and unit
  1600. match = re.match(r"^(\d+)([smhdMy])$", time_str)
  1601. if not match:
  1602. raise ValueError(
  1603. f"Invalid time period format: {time_str}. "
  1604. "Expected format: <number><unit> where unit is s (seconds), "
  1605. "m (minutes), h (hours), d (days), M (months), or y (years)"
  1606. )
  1607. amount = int(match.group(1))
  1608. unit = match.group(2)
  1609. # Convert to seconds
  1610. conversions = {
  1611. "s": 1, # seconds
  1612. "m": 60, # minutes
  1613. "h": 3600, # hours
  1614. "d": 86400, # days
  1615. "M": 2592000, # months (30 days)
  1616. "y": 31536000, # years (365 days)
  1617. }
  1618. return amount * conversions[unit]