env.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. """All of W&B's environment variables.
  2. Getters and putters for all of them should go here. That way it'll be easier to
  3. avoid typos with names and be consistent about environment variables' semantics.
  4. Environment variables are not the authoritative source for these values in many
  5. cases.
  6. """
  7. from __future__ import annotations
  8. import json
  9. import os
  10. import sys
  11. from collections.abc import MutableMapping
  12. from pathlib import Path
  13. import platformdirs
  14. CONFIG_PATHS = "WANDB_CONFIG_PATHS"
  15. SWEEP_PARAM_PATH = "WANDB_SWEEP_PARAM_PATH"
  16. SHOW_RUN = "WANDB_SHOW_RUN"
  17. DEBUG = "WANDB_DEBUG"
  18. SILENT = "WANDB_SILENT"
  19. QUIET = "WANDB_QUIET"
  20. INITED = "WANDB_INITED"
  21. DIR = "WANDB_DIR"
  22. # Deprecate DESCRIPTION in a future release
  23. DESCRIPTION = "WANDB_DESCRIPTION"
  24. NAME = "WANDB_NAME"
  25. NOTEBOOK_NAME = "WANDB_NOTEBOOK_NAME"
  26. NOTES = "WANDB_NOTES"
  27. USERNAME = "WANDB_USERNAME"
  28. USER_EMAIL = "WANDB_USER_EMAIL"
  29. PROJECT = "WANDB_PROJECT"
  30. ENTITY = "WANDB_ENTITY"
  31. ORGANIZATION = "WANDB_ORGANIZATION"
  32. BASE_URL = "WANDB_BASE_URL"
  33. APP_URL = "WANDB_APP_URL"
  34. PROGRAM = "WANDB_PROGRAM"
  35. ARGS = "WANDB_ARGS"
  36. MODE = "WANDB_MODE"
  37. START_METHOD = "WANDB_START_METHOD"
  38. RESUME = "WANDB_RESUME"
  39. RUN_ID = "WANDB_RUN_ID"
  40. RUN_STORAGE_ID = "WANDB_RUN_STORAGE_ID"
  41. RUN_GROUP = "WANDB_RUN_GROUP"
  42. RUN_DIR = "WANDB_RUN_DIR"
  43. SWEEP_ID = "WANDB_SWEEP_ID"
  44. HTTP_TIMEOUT = "WANDB_HTTP_TIMEOUT"
  45. FILE_PUSHER_TIMEOUT = "WANDB_FILE_PUSHER_TIMEOUT"
  46. API_KEY = "WANDB_API_KEY"
  47. IDENTITY_TOKEN_FILE = "WANDB_IDENTITY_TOKEN_FILE"
  48. CREDENTIALS_FILE = "WANDB_CREDENTIALS_FILE"
  49. JOB_TYPE = "WANDB_JOB_TYPE"
  50. DISABLE_CODE = "WANDB_DISABLE_CODE"
  51. DISABLE_GIT = "WANDB_DISABLE_GIT"
  52. GIT_ROOT = "WANDB_GIT_ROOT"
  53. SAVE_CODE = "WANDB_SAVE_CODE"
  54. TAGS = "WANDB_TAGS"
  55. IGNORE = "WANDB_IGNORE_GLOBS"
  56. ERROR_REPORTING = "WANDB_ERROR_REPORTING"
  57. CORE_DEBUG = "WANDB_CORE_DEBUG"
  58. DOCKER = "WANDB_DOCKER"
  59. AGENT_REPORT_INTERVAL = "WANDB_AGENT_REPORT_INTERVAL"
  60. AGENT_KILL_DELAY = "WANDB_AGENT_KILL_DELAY"
  61. AGENT_DISABLE_FLAPPING = "WANDB_AGENT_DISABLE_FLAPPING"
  62. AGENT_MAX_INITIAL_FAILURES = "WANDB_AGENT_MAX_INITIAL_FAILURES"
  63. CRASH_NOSYNC_TIME = "WANDB_CRASH_NOSYNC_TIME"
  64. MAGIC = "WANDB_MAGIC"
  65. HOST = "WANDB_HOST"
  66. ANONYMOUS = "WANDB_ANONYMOUS"
  67. JUPYTER = "WANDB_JUPYTER"
  68. CONFIG_DIR = "WANDB_CONFIG_DIR"
  69. DATA_DIR = "WANDB_DATA_DIR"
  70. ARTIFACT_DIR = "WANDB_ARTIFACT_DIR"
  71. ARTIFACT_FETCH_FILE_URL_BATCH_SIZE = "WANDB_ARTIFACT_FETCH_FILE_URL_BATCH_SIZE"
  72. CACHE_DIR = "WANDB_CACHE_DIR"
  73. DISABLE_SSL = "WANDB_INSECURE_DISABLE_SSL"
  74. SERVICE = "WANDB_SERVICE"
  75. SENTRY_DSN = "WANDB_SENTRY_DSN"
  76. INIT_TIMEOUT = "WANDB_INIT_TIMEOUT"
  77. GIT_COMMIT = "WANDB_GIT_COMMIT"
  78. GIT_REMOTE_URL = "WANDB_GIT_REMOTE_URL"
  79. _EXECUTABLE = "WANDB_X_EXECUTABLE"
  80. LAUNCH_QUEUE_NAME = "WANDB_LAUNCH_QUEUE_NAME"
  81. LAUNCH_QUEUE_ENTITY = "WANDB_LAUNCH_QUEUE_ENTITY"
  82. LAUNCH_TRACE_ID = "WANDB_LAUNCH_TRACE_ID"
  83. ENABLE_DCGM_PROFILING = "WANDB_ENABLE_DCGM_PROFILING"
  84. DISABLE_ORJSON = "WANDB_DISABLE_ORJSON"
  85. # For testing, to be removed in future version
  86. USE_V1_ARTIFACTS = "_WANDB_USE_V1_ARTIFACTS"
  87. def immutable_keys() -> list[str]:
  88. """These are env keys that shouldn't change within a single process.
  89. We use this to maintain certain values between multiple calls to wandb.init within a single process.
  90. """
  91. return [
  92. DIR,
  93. ENTITY,
  94. PROJECT,
  95. API_KEY,
  96. IGNORE,
  97. DISABLE_CODE,
  98. DISABLE_GIT,
  99. DOCKER,
  100. MODE,
  101. BASE_URL,
  102. ERROR_REPORTING,
  103. CRASH_NOSYNC_TIME,
  104. MAGIC,
  105. USERNAME,
  106. USER_EMAIL,
  107. DIR,
  108. SILENT,
  109. CONFIG_PATHS,
  110. ANONYMOUS,
  111. RUN_GROUP,
  112. JOB_TYPE,
  113. TAGS,
  114. RESUME,
  115. AGENT_REPORT_INTERVAL,
  116. HTTP_TIMEOUT,
  117. HOST,
  118. DATA_DIR,
  119. ARTIFACT_DIR,
  120. ARTIFACT_FETCH_FILE_URL_BATCH_SIZE,
  121. CACHE_DIR,
  122. USE_V1_ARTIFACTS,
  123. DISABLE_SSL,
  124. IDENTITY_TOKEN_FILE,
  125. CREDENTIALS_FILE,
  126. ]
  127. def _env_as_bool(
  128. var: str, default: str | None = None, env: MutableMapping | None = None
  129. ) -> bool:
  130. if env is None:
  131. env = os.environ
  132. val = env.get(var, default)
  133. if not isinstance(val, str):
  134. return False
  135. try:
  136. return strtobool(val)
  137. except ValueError:
  138. return False
  139. def is_debug(default: str | None = None, env: MutableMapping | None = None) -> bool:
  140. return _env_as_bool(DEBUG, default=default, env=env)
  141. def is_offline(env: MutableMapping | None = None) -> bool:
  142. if env is None:
  143. env = os.environ
  144. return env.get(MODE) == "offline"
  145. def is_quiet() -> bool:
  146. return _env_as_bool(QUIET, default="false")
  147. def is_silent() -> bool:
  148. return _env_as_bool(SILENT, default="false")
  149. def error_reporting_enabled() -> bool:
  150. return _env_as_bool(ERROR_REPORTING, default="True")
  151. def core_debug(default: str | None = None) -> bool:
  152. return _env_as_bool(CORE_DEBUG, default=default) or is_debug()
  153. def ssl_disabled() -> bool:
  154. return _env_as_bool(DISABLE_SSL, default="False")
  155. def dcgm_profiling_enabled() -> bool:
  156. """Checks whether collecting profiling metrics for Nvidia GPUs using DCGM is requested.
  157. Note: Enabling this feature can lead to increased resource usage
  158. compared to standard monitoring.
  159. Requires the `nvidia-dcgm` service to be running on the machine.
  160. """
  161. return _env_as_bool(ENABLE_DCGM_PROFILING, default="False")
  162. def get_error_reporting(
  163. default: bool | str = True,
  164. env: MutableMapping | None = None,
  165. ) -> bool | str:
  166. if env is None:
  167. env = os.environ
  168. return env.get(ERROR_REPORTING, default)
  169. def get_run(
  170. default: str | None = None, env: MutableMapping | None = None
  171. ) -> str | None:
  172. if env is None:
  173. env = os.environ
  174. return env.get(RUN_ID, default)
  175. def get_args(
  176. default: list[str] | None = None, env: MutableMapping | None = None
  177. ) -> list[str] | None:
  178. if env is None:
  179. env = os.environ
  180. if env.get(ARGS):
  181. try:
  182. return json.loads(env.get(ARGS, "[]")) # type: ignore
  183. except ValueError:
  184. return None
  185. else:
  186. return default or sys.argv[1:]
  187. def get_docker(
  188. default: str | None = None, env: MutableMapping | None = None
  189. ) -> str | None:
  190. if env is None:
  191. env = os.environ
  192. return env.get(DOCKER, default)
  193. def get_http_timeout(default: int = 20, env: MutableMapping | None = None) -> int:
  194. if env is None:
  195. env = os.environ
  196. return int(env.get(HTTP_TIMEOUT, default))
  197. def get_file_pusher_timeout(
  198. default: int | None = None,
  199. env: MutableMapping | None = None,
  200. ) -> int | None:
  201. if env is None:
  202. env = os.environ
  203. timeout = env.get(FILE_PUSHER_TIMEOUT, default)
  204. return int(timeout) if timeout else None
  205. def get_ignore(
  206. default: list[str] | None = None, env: MutableMapping | None = None
  207. ) -> list[str] | None:
  208. if env is None:
  209. env = os.environ
  210. ignore = env.get(IGNORE)
  211. if ignore is not None:
  212. return ignore.split(",")
  213. else:
  214. return default
  215. def get_project(
  216. default: str | None = None, env: MutableMapping | None = None
  217. ) -> str | None:
  218. if env is None:
  219. env = os.environ
  220. return env.get(PROJECT, default)
  221. def get_username(
  222. default: str | None = None, env: MutableMapping | None = None
  223. ) -> str | None:
  224. if env is None:
  225. env = os.environ
  226. return env.get(USERNAME, default)
  227. def get_user_email(
  228. default: str | None = None, env: MutableMapping | None = None
  229. ) -> str | None:
  230. if env is None:
  231. env = os.environ
  232. return env.get(USER_EMAIL, default)
  233. def get_entity(
  234. default: str | None = None, env: MutableMapping | None = None
  235. ) -> str | None:
  236. if env is None:
  237. env = os.environ
  238. return env.get(ENTITY, default)
  239. def get_organization(
  240. default: str | None = None, env: MutableMapping | None = None
  241. ) -> str | None:
  242. if env is None:
  243. env = os.environ
  244. return env.get(ORGANIZATION, default)
  245. def get_base_url(
  246. default: str | None = None, env: MutableMapping | None = None
  247. ) -> str | None:
  248. if env is None:
  249. env = os.environ
  250. return env.get(BASE_URL, default)
  251. def get_app_url(
  252. default: str | None = None, env: MutableMapping | None = None
  253. ) -> str | None:
  254. if env is None:
  255. env = os.environ
  256. return env.get(APP_URL, default)
  257. def get_show_run(default: str | None = None, env: MutableMapping | None = None) -> bool:
  258. if env is None:
  259. env = os.environ
  260. return bool(env.get(SHOW_RUN, default))
  261. def get_description(
  262. default: str | None = None, env: MutableMapping | None = None
  263. ) -> str | None:
  264. if env is None:
  265. env = os.environ
  266. return env.get(DESCRIPTION, default)
  267. def get_tags(default: str = "", env: MutableMapping | None = None) -> list[str]:
  268. if env is None:
  269. env = os.environ
  270. return [tag for tag in env.get(TAGS, default).split(",") if tag]
  271. def get_dir(
  272. default: str | None = None, env: MutableMapping | None = None
  273. ) -> str | None:
  274. if env is None:
  275. env = os.environ
  276. return env.get(DIR, default)
  277. def get_config_paths(
  278. default: str | None = None, env: MutableMapping | None = None
  279. ) -> str | None:
  280. if env is None:
  281. env = os.environ
  282. return env.get(CONFIG_PATHS, default)
  283. def get_agent_report_interval(
  284. default: str | None = None, env: MutableMapping | None = None
  285. ) -> int | None:
  286. if env is None:
  287. env = os.environ
  288. val = env.get(AGENT_REPORT_INTERVAL, default)
  289. try:
  290. val = int(val) # type: ignore
  291. except ValueError:
  292. val = None # silently ignore env format errors, caller should handle.
  293. return val
  294. def get_agent_kill_delay(
  295. default: str | None = None, env: MutableMapping | None = None
  296. ) -> int | None:
  297. if env is None:
  298. env = os.environ
  299. val = env.get(AGENT_KILL_DELAY, default)
  300. try:
  301. val = int(val) # type: ignore
  302. except ValueError:
  303. val = None # silently ignore env format errors, caller should handle.
  304. return val
  305. def get_crash_nosync_time(
  306. default: str | None = None, env: MutableMapping | None = None
  307. ) -> int | None:
  308. if env is None:
  309. env = os.environ
  310. val = env.get(CRASH_NOSYNC_TIME, default)
  311. try:
  312. val = int(val) # type: ignore
  313. except ValueError:
  314. val = None # silently ignore env format errors, caller should handle.
  315. return val
  316. def get_magic(
  317. default: str | None = None, env: MutableMapping | None = None
  318. ) -> str | None:
  319. if env is None:
  320. env = os.environ
  321. val = env.get(MAGIC, default)
  322. return val
  323. def get_data_dir(env: MutableMapping | None = None) -> str:
  324. default_dir = platformdirs.user_data_dir("wandb")
  325. if env is None:
  326. env = os.environ
  327. val = env.get(DATA_DIR, default_dir)
  328. return val
  329. def get_artifact_dir(env: MutableMapping | None = None) -> str:
  330. default_dir = os.path.join(".", "artifacts")
  331. if env is None:
  332. env = os.environ
  333. val = env.get(ARTIFACT_DIR, default_dir)
  334. return os.path.abspath(str(val))
  335. def get_artifact_fetch_file_url_batch_size(env: MutableMapping | None = None) -> int:
  336. default_batch_size = 5000
  337. if env is None:
  338. env = os.environ
  339. val = int(env.get(ARTIFACT_FETCH_FILE_URL_BATCH_SIZE, default_batch_size))
  340. return val
  341. def get_cache_dir(env: MutableMapping | None = None) -> Path:
  342. env = env or os.environ
  343. return Path(env.get(CACHE_DIR, platformdirs.user_cache_dir("wandb")))
  344. def get_use_v1_artifacts(env: MutableMapping | None = None) -> bool:
  345. if env is None:
  346. env = os.environ
  347. val = bool(env.get(USE_V1_ARTIFACTS, False))
  348. return val
  349. def get_agent_max_initial_failures(
  350. default: int | None = None, env: MutableMapping | None = None
  351. ) -> int | None:
  352. if env is None:
  353. env = os.environ
  354. val = env.get(AGENT_MAX_INITIAL_FAILURES, default)
  355. try:
  356. val = int(val) # type: ignore
  357. except ValueError:
  358. val = default
  359. return val
  360. def set_entity(value: str, env: MutableMapping | None = None) -> None:
  361. if env is None:
  362. env = os.environ
  363. env[ENTITY] = value
  364. def set_project(value: str, env: MutableMapping | None = None) -> None:
  365. if env is None:
  366. env = os.environ
  367. env[PROJECT] = value or "uncategorized"
  368. def should_save_code() -> bool:
  369. save_code = _env_as_bool(SAVE_CODE, default="False")
  370. code_disabled = _env_as_bool(DISABLE_CODE, default="False")
  371. return save_code and not code_disabled
  372. def disable_git(env: MutableMapping | None = None) -> bool:
  373. if env is None:
  374. env = os.environ
  375. val = env.get(DISABLE_GIT, "False")
  376. if isinstance(val, str):
  377. val = val.lower() != "false"
  378. return val
  379. def get_launch_queue_name(env: MutableMapping | None = None) -> str | None:
  380. if env is None:
  381. env = os.environ
  382. val = env.get(LAUNCH_QUEUE_NAME, None)
  383. return val
  384. def get_launch_queue_entity(env: MutableMapping | None = None) -> str | None:
  385. if env is None:
  386. env = os.environ
  387. val = env.get(LAUNCH_QUEUE_ENTITY, None)
  388. return val
  389. def get_launch_trace_id(env: MutableMapping | None = None) -> str | None:
  390. if env is None:
  391. env = os.environ
  392. val = env.get(LAUNCH_TRACE_ID, None)
  393. return val
  394. def get_credentials_file(default: str, env: MutableMapping | None = None) -> Path:
  395. """Retrieve the path for the credentials file used to save access tokens.
  396. The credentials file path can be set via an environment variable, otherwise
  397. the default path is used.
  398. """
  399. if env is None:
  400. env = os.environ
  401. credentials_file = env.get(CREDENTIALS_FILE, default)
  402. return Path(credentials_file)
  403. def strtobool(val: str) -> bool:
  404. """Convert a string representation of truth to true or false.
  405. Copied from distutils. distutils was removed in Python 3.12.
  406. """
  407. val = val.lower()
  408. if val in ("y", "yes", "t", "true", "on", "1"):
  409. return True
  410. elif val in ("n", "no", "f", "false", "off", "0"):
  411. return False
  412. else:
  413. raise ValueError(f"invalid truth value {val!r}")