| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309 |
- from __future__ import annotations
- import json
- import logging
- import os
- import pathlib
- import platform
- import re
- import shutil
- import socket
- import sys
- import traceback
- from collections.abc import Sequence
- from datetime import datetime
- # Optional and Union are used for type hinting instead of | because
- # the latter is not supported in pydantic<2.6 and Python<3.10.
- # Dict, List, and Tuple are used for backwards compatibility
- # with pydantic v1 and Python<3.9.
- from typing import Any, Dict, List, Literal, Optional, Tuple, Union
- from urllib.parse import quote, unquote
- from google.protobuf.wrappers_pb2 import BoolValue, DoubleValue, Int32Value, StringValue
- from pydantic import BaseModel, ConfigDict, Field
- from typing_extensions import Self
- import wandb
- from wandb import env, util
- from wandb._pydantic import (
- IS_PYDANTIC_V2,
- AliasChoices,
- ValidationError,
- computed_field,
- field_validator,
- model_validator,
- )
- from wandb.errors import UsageError
- from wandb.proto import wandb_settings_pb2
- from wandb.sdk.lib import deprecation, settings_file, urls
- from .lib import credentials, filesystem, ipython
- from .lib.run_moment import RunMoment
- if not IS_PYDANTIC_V2:
- from pydantic import root_validator
- def _path_convert(*args: str) -> str:
- """Join path and apply os.path.expanduser to it."""
- return os.path.expanduser(os.path.join(*args))
- CLIENT_ONLY_SETTINGS = (
- "anonymous",
- "app_url_override",
- "files_dir",
- "max_end_of_run_history_metrics",
- "max_end_of_run_summary_metrics",
- "reinit",
- "x_files_dir",
- "x_sync_dir_suffix",
- )
- """Python-only keys that are not fields on the settings proto."""
- class Settings(BaseModel, validate_assignment=True):
- """Settings for the W&B SDK.
- This class manages configuration settings for the W&B SDK,
- ensuring type safety and validation of all settings. Settings are accessible
- as attributes and can be initialized programmatically, through environment
- variables (`WANDB_ prefix`), and with configuration files.
- The settings are organized into three categories:
- 1. Public settings: Core configuration options that users can safely modify to customize
- W&B's behavior for their specific needs.
- 2. Internal settings: Settings prefixed with 'x_' that handle low-level SDK behavior.
- These settings are primarily for internal use and debugging. While they can be modified,
- they are not considered part of the public API and may change without notice in future
- versions.
- 3. Computed settings: Read-only settings that are automatically derived from other settings or
- the environment.
- """
- # Pydantic Model configuration.
- model_config = ConfigDict(
- extra="forbid", # throw an error if extra fields are provided
- validate_default=True, # validate default values
- use_attribute_docstrings=True, # for field descriptions
- revalidate_instances="always",
- )
- # Public settings.
- allow_media_symlink: bool = False
- """Whether to symlink media files to the run directory.
- If true, media files will be symlinked or hardlinked to the
- run directory instead of copied. This may result in faster
- logging and reduced disk usage. However, deleting or modifying
- the original files before upload to the W&B server will be
- reflected in the uploaded data.
- """
- allow_offline_artifacts: bool = True
- """Flag to allow table artifacts to be synced in offline mode.
- To revert to the old behavior, set this to False.
- """
- allow_val_change: bool = False
- """Flag to allow modification of `Config` values after they've been set."""
- anonymous: deprecation.DoNotSet = Field(
- default=deprecation.UNSET,
- exclude=True,
- )
- """Deprecated and will be removed."""
- api_key: Optional[str] = None
- """The W&B API key."""
- azure_account_url_to_access_key: Optional[Dict[str, str]] = None
- """Mapping of Azure account URLs to their corresponding access keys for Azure integration."""
- app_url_override: Optional[str] = None
- """Override for the 'app' URL for the W&B UI.
- The `app_url` is normally computed based on `base_url`, but this can be
- used to set it explicitly.
- WANDB_APP_URL is the corresponding environment variable.
- """
- base_url: str = "https://api.wandb.ai"
- """The URL of the W&B backend for data synchronization."""
- code_dir: Optional[str] = None
- """Directory containing the code to be tracked by W&B."""
- config_paths: Optional[Sequence[str]] = None
- """Paths to files to load configuration from into the `Config` object."""
- console: Literal["auto", "off", "wrap", "redirect", "wrap_raw", "wrap_emu"] = Field(
- default="auto",
- validate_default=True,
- )
- """The type of console capture to be applied.
- Possible values are:
- - "auto" - Automatically selects the console capture method based on the
- system environment and settings.
- - "off" - Disables console capture.
- - "redirect" - Redirects low-level file descriptors for capturing output.
- - "wrap" - Overrides the write methods of sys.stdout/sys.stderr. Will be
- mapped to either "wrap_raw" or "wrap_emu" based on the state of the system.
- - "wrap_raw" - Same as "wrap" but captures raw output directly instead of
- through an emulator. Derived from the `wrap` setting and should not be set manually.
- - "wrap_emu" - Same as "wrap" but captures output through an emulator.
- Derived from the `wrap` setting and should not be set manually.
- """
- console_multipart: bool = False
- """Enable multipart console logging.
- When True, the SDK writes console output to timestamped files
- under the `logs/` directory instead of a single `output.log`.
- Each part is uploaded as soon as it is closed, giving users live
- access to logs while the run is active. Rollover cadence is
- controlled by `console_chunk_max_bytes` and/or `console_chunk_max_seconds`.
- If both limits are `0`, all logs are uploaded once at run finish.
- Note: Uploaded chunks are immutable; terminal control sequences
- that modify previous lines (e.g., progress bars using carriage returns)
- only affect the current chunk.
- """
- console_chunk_max_bytes: int = 0
- """Size-based rollover threshold for multipart console logs, in bytes.
- Starts a new console log file when the current part reaches this
- size. Has an effect only when `console_multipart` is `True`.
- Can be combined with `console_chunk_max_seconds`; whichever limit is
- hit first triggers the rollover. A value of `0` disables the
- size-based limit.
- """
- console_chunk_max_seconds: int = 0
- """Time-based rollover threshold for multipart console logs, in seconds.
- Starts a new console log file after this many seconds have elapsed
- since the current part began. Requires `console_multipart` to be
- `True`. May be used with `console_chunk_max_bytes`; the first limit
- reached closes the part. A value of `0` disables the time-based
- limit.
- """
- credentials_file: str = Field(
- default_factory=lambda: str(credentials.DEFAULT_WANDB_CREDENTIALS_FILE)
- )
- """Path to file for writing temporary access tokens."""
- disable_code: bool = False
- """Whether to disable capturing the code."""
- disable_git: bool = False
- """Whether to disable capturing the git state."""
- disable_git_fork_point: bool = True
- """Whether to disable inferring fork point from remote branches
- When set to True, the SDK will use the latest commit from the upstream
- branch, if one is set. Otherwise skip generating the diff patch.
- When set to False, the SDK will try to use the latest commit from the upstream branch,
- if one is set.
- Otherwise, it will find the closest commit from all remote branches.
- This may impact performance for repos with many upstream branches.
- """
- disable_job_creation: bool = True
- """Whether to disable the creation of a job artifact for W&B Launch."""
- docker: Optional[str] = None
- """The Docker image used to execute the script."""
- email: Optional[str] = None
- """The email address of the user."""
- entity: Optional[str] = None
- """The W&B entity, such as a user or a team."""
- organization: Optional[str] = None
- """The W&B organization."""
- force: bool = False
- """Whether to pass the `force` flag to `wandb.login()`."""
- fork_from: Optional[RunMoment] = None
- """Specifies a point in a previous execution of a run to fork from.
- The point is defined by the run ID, a metric, and its value.
- Currently, only the metric '_step' is supported.
- """
- git_commit: Optional[str] = None
- """The git commit hash to associate with the run."""
- git_remote: str = "origin"
- """The git remote to associate with the run."""
- git_remote_url: Optional[str] = None
- """The URL of the git remote repository."""
- git_root: Optional[str] = None
- """Root directory of the git repository."""
- heartbeat_seconds: int = 30
- """Interval in seconds between heartbeat signals sent to the W&B servers.
- <!-- lazydoc-ignore-class-attributes -->
- """
- host: Optional[str] = None
- """Hostname of the machine running the script."""
- http_proxy: Optional[str] = None
- """Custom proxy servers for http requests to W&B."""
- https_proxy: Optional[str] = None
- """Custom proxy servers for https requests to W&B."""
- identity_token_file: Optional[str] = None
- """Path to file containing an identity token (JWT) for authentication."""
- ignore_globs: Sequence[str] = ()
- """Unix glob patterns relative to `files_dir` specifying files to exclude from upload."""
- init_timeout: float = 90.0
- """Time in seconds to wait for the `wandb.init` call to complete before timing out."""
- insecure_disable_ssl: bool = False
- """Whether to insecurely disable SSL verification."""
- job_name: Optional[str] = None
- """Name of the Launch job running the script."""
- job_source: Optional[Literal["repo", "artifact", "image"]] = None
- """Source type for Launch."""
- label_disable: bool = False
- """Whether to disable automatic labeling features."""
- launch: bool = False
- """Flag to indicate if the run is being launched through W&B Launch.
- <!-- lazydoc-ignore-class-attributes -->
- """
- launch_config_path: Optional[str] = None
- """Path to the launch configuration file."""
- login_timeout: Optional[float] = None
- """Time in seconds to wait for login operations before timing out."""
- mode: Literal["online", "offline", "shared", "disabled", "dryrun", "run"] = Field(
- default="online",
- validate_default=True,
- )
- """The operating mode for W&B logging and synchronization."""
- notebook_name: Optional[str] = None
- """Name of the notebook if running in a Jupyter-like environment."""
- program: Optional[str] = None
- """Path to the script that created the run, if available."""
- program_abspath: Optional[str] = None
- """The absolute path from the root repository directory to the script that
- created the run.
- Root repository directory is defined as the directory containing the
- .git directory, if it exists. Otherwise, it's the current working directory.
- """
- program_relpath: Optional[str] = None
- """The relative path to the script that created the run."""
- project: Optional[str] = None
- """The W&B project ID."""
- quiet: bool = False
- """Flag to suppress non-essential output."""
- reinit: Union[
- Literal[
- "default",
- "return_previous",
- "finish_previous",
- "create_new",
- ],
- bool,
- ] = "default"
- """What to do when `wandb.init()` is called while a run is active.
- Options:
- - "default": Use "finish_previous" in notebooks and "return_previous"
- otherwise.
- - "return_previous": Return the most recently created run
- that is not yet finished. This does not update `wandb.run`; see
- the "create_new" option.
- - "finish_previous": Finish all active runs, then return a new run.
- - "create_new": Create a new run without modifying other active runs.
- Does not update `wandb.run` and top-level functions like `wandb.log`.
- Because of this, some older integrations that rely on the global run
- will not work.
- Can also be a boolean, but this is deprecated. False is the same as
- "return_previous", and True is the same as "finish_previous".
- """
- relogin: bool = False
- """Flag to force a new login attempt."""
- resume: Optional[Literal["allow", "must", "never", "auto"]] = None
- """Specifies the resume behavior for the run.
- Options:
- - "must": Resumes from an existing run with the same ID. If no such run exists,
- it will result in failure.
- - "allow": Attempts to resume from an existing run with the same ID. If none is
- found, a new run will be created.
- - "never": Always starts a new run. If a run with the same ID already exists,
- it will result in failure.
- - "auto": Automatically resumes from the most recent failed run on the same
- machine.
- """
- resume_from: Optional[RunMoment] = None
- """Specifies a point in a previous execution of a run to resume from.
- The point is defined by the run ID, a metric, and its value.
- Currently, only the metric '_step' is supported.
- """
- resumed: bool = False
- """Indication from the server about the state of the run.
- This is different from resume, a user provided flag.
- <!-- lazydoc-ignore-class-attributes -->
- """
- root_dir: str = Field(default_factory=lambda: os.path.abspath(os.getcwd()))
- """The root directory to use as the base for all run-related paths.
- In particular, this is used to derive the wandb directory and the run directory.
- """
- run_group: Optional[str] = None
- """Group identifier for related runs.
- Used for grouping runs in the UI.
- """
- run_id: Optional[str] = None
- """The ID of the run."""
- run_job_type: Optional[str] = None
- """Type of job being run (e.g., training, evaluation)."""
- run_name: Optional[str] = None
- """Human-readable name for the run."""
- run_notes: Optional[str] = None
- """Additional notes or description for the run."""
- run_tags: Optional[Tuple[str, ...]] = None
- """Tags to associate with the run for organization and filtering."""
- sagemaker_disable: bool = False
- """Flag to disable SageMaker-specific functionality."""
- save_code: Optional[bool] = None
- """Whether to save the code associated with the run."""
- settings_system: Optional[str] = None
- """Path to the system-wide settings file."""
- max_end_of_run_history_metrics: int = 10
- """Maximum number of history sparklines to display at the end of a run."""
- max_end_of_run_summary_metrics: int = 10
- """Maximum number of summary metrics to display at the end of a run."""
- show_colors: Optional[bool] = None
- """Whether to use colored output in the console.
- <!-- lazydoc-ignore-class-attributes -->
- """
- show_emoji: Optional[bool] = None
- """Whether to show emoji in the console output.
- <!-- lazydoc-ignore-class-attributes -->
- """
- show_errors: bool = True
- """Whether to display error messages."""
- show_info: bool = True
- """Whether to display informational messages."""
- show_warnings: bool = True
- """Whether to display warning messages."""
- silent: bool = False
- """Flag to suppress all output."""
- start_method: Optional[str] = None
- """Method to use for starting subprocesses.
- This is deprecated and will be removed in a future release.
- <!-- lazydoc-ignore-class-attributes -->
- """
- strict: Optional[bool] = None
- """Whether to enable strict mode for validation and error checking."""
- summary_timeout: int = 60
- """Time in seconds to wait for summary operations before timing out."""
- summary_warnings: int = 5
- """Maximum number of summary warnings to display.
- <!-- lazydoc-ignore-class-attributes -->
- """
- sweep_id: Optional[str] = None
- """Identifier of the sweep this run belongs to."""
- sweep_param_path: Optional[str] = None
- """Path to the sweep parameters configuration."""
- symlink: bool = Field(default_factory=lambda: platform.system() != "Windows")
- """Whether to use symlinks (True by default except on Windows)."""
- sync_tensorboard: Optional[bool] = None
- """Whether to synchronize TensorBoard logs with W&B."""
- table_raise_on_max_row_limit_exceeded: bool = False
- """Whether to raise an exception when table row limits are exceeded."""
- use_dot_wandb: Optional[bool] = None
- """Whether to use a hidden `.wandb` or visible `wandb` directory for run data.
- If True, the SDK uses `.wandb`. If False, `wandb`.
- If not set, defaults to `.wandb` if it already exists, otherwise `wandb`.
- """
- username: Optional[str] = None
- """Username."""
- # Internal settings.
- #
- # These are typically not meant to be set by the user and should not be considered
- # a part of the public API as they may change or be removed in future versions.
- x_cli_only_mode: bool = False
- """Flag to indicate that the SDK is running in CLI-only mode.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_disable_meta: bool = False
- """Flag to disable the collection of system metadata."""
- x_disable_stats: bool = False
- """Flag to disable the collection of system metrics."""
- x_disable_viewer: bool = False
- """Flag to disable the early viewer query.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_disable_machine_info: bool = False
- """Flag to disable automatic machine info collection.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_executable: Optional[str] = None
- """Path to the Python executable.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_extra_http_headers: Optional[Dict[str, str]] = None
- """Additional headers to add to all outgoing HTTP requests."""
- x_file_stream_max_bytes: Optional[int] = None
- """An approximate maximum request size for the filestream API.
- Its purpose is to prevent HTTP requests from failing due to
- containing too much data. This number is approximate:
- requests will be slightly larger.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_file_stream_max_line_bytes: Optional[int] = None
- """Maximum line length for filestream JSONL files.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_file_stream_transmit_interval: Optional[float] = None
- """Interval in seconds between filestream transmissions.
- <!-- lazydoc-ignore-class-attributes -->
- """
- # Filestream retry client configuration.
- x_file_stream_retry_max: Optional[int] = None
- """Max number of retries for filestream operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_file_stream_retry_wait_min_seconds: Optional[float] = None
- """Minimum wait time between retries for filestream operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_file_stream_retry_wait_max_seconds: Optional[float] = None
- """Maximum wait time between retries for filestream operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_file_stream_timeout_seconds: Optional[float] = None
- """Timeout in seconds for individual filestream HTTP requests.
- <!-- lazydoc-ignore-class-attributes -->
- """
- # file transfer retry client configuration
- x_file_transfer_retry_max: Optional[int] = None
- """Max number of retries for file transfer operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_file_transfer_retry_wait_min_seconds: Optional[float] = None
- """Minimum wait time between retries for file transfer operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_file_transfer_retry_wait_max_seconds: Optional[float] = None
- """Maximum wait time between retries for file transfer operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_file_transfer_timeout_seconds: Optional[float] = None
- """Timeout in seconds for individual file transfer HTTP requests.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_files_dir: Optional[str] = None
- """Override setting for the computed files_dir.
- DEPRECATED, DO NOT USE. This private setting is not respected by wandb-core
- but will continue to work for some legacy Python code.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_flow_control_custom: Optional[bool] = None
- """Flag indicating custom flow control for filestream.
- TODO: Not implemented in wandb-core.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_flow_control_disabled: Optional[bool] = None
- """Flag indicating flow control is disabled for filestream.
- TODO: Not implemented in wandb-core.
- <!-- lazydoc-ignore-class-attributes -->
- """
- # graphql retry client configuration
- x_graphql_retry_max: Optional[int] = None
- """Max number of retries for GraphQL operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_graphql_retry_wait_min_seconds: Optional[float] = None
- """Minimum wait time between retries for GraphQL operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_graphql_retry_wait_max_seconds: Optional[float] = None
- """Maximum wait time between retries for GraphQL operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_graphql_timeout_seconds: Optional[float] = None
- """Timeout in seconds for individual GraphQL requests.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_internal_check_process: float = 8.0
- """Interval for internal process health checks in seconds.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_jupyter_name: Optional[str] = None
- """Name of the Jupyter notebook.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_jupyter_path: Optional[str] = None
- """Path to the Jupyter notebook.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_jupyter_root: Optional[str] = None
- """Root directory of the Jupyter notebook.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_label: Optional[str] = None
- """Label to assign to system metrics and console logs collected for the run.
- This is used to group data by on the frontend and can be used to distinguish data
- from different processes in a distributed training job.
- """
- x_live_policy_rate_limit: Optional[int] = None
- """Rate limit for live policy updates in seconds.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_live_policy_wait_time: Optional[int] = None
- """Wait time between live policy updates in seconds.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_log_level: int = logging.INFO
- """Logging level for internal operations.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_network_buffer: Optional[int] = None
- """Size of the network buffer used in flow control.
- TODO: Not implemented in wandb-core.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_primary: bool = Field(
- default=True, validation_alias=AliasChoices("x_primary", "x_primary_node")
- )
- """Determines whether to save internal wandb files and metadata.
- In a distributed setting, this is useful for avoiding file overwrites
- from secondary processes when only system metrics and logs are needed,
- as the primary process handles the main logging.
- """
- x_proxies: Optional[Dict[str, str]] = None
- """Custom proxy servers for requests to W&B.
- This is deprecated and will be removed in a future release.
- Please use `http_proxy` and `https_proxy` instead.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_runqueue_item_id: Optional[str] = None
- """ID of the Launch run queue item being processed.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_save_requirements: bool = True
- """Flag to save the requirements file."""
- x_server_side_derived_summary: bool = False
- """Flag to delegate automatic computation of summary from history to the server.
- This does not disable user-provided summary updates.
- """
- x_server_side_expand_glob_metrics: bool = True
- """Flag to delegate glob matching of metrics in define_metric to the server.
- If the server does not support this, the client will perform the glob matching.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_service_transport: Optional[str] = None
- """Transport method for communication with the wandb service.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_service_wait: float = 30.0
- """Time in seconds to wait for the wandb-core internal service to start."""
- x_skip_transaction_log: bool = False
- """Whether to skip saving the run events to the transaction log.
- This is only relevant for online runs. Can be used to reduce the amount of
- data written to disk.
- Should be used with caution, as it removes the gurantees about
- recoverability.
- """
- x_start_time: Optional[float] = None
- """The start time of the run in seconds since the Unix epoch.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_stats_pid: int = os.getpid()
- """PID of the process that started the wandb-core process to collect system stats for.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_stats_sampling_interval: float = Field(default=15.0)
- """Sampling interval for the system monitor in seconds."""
- x_stats_neuron_monitor_config_path: Optional[str] = None
- """Path to the default config file for the neuron-monitor tool.
- This is used to monitor AWS Trainium devices.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_stats_dcgm_exporter: Optional[str] = None
- """Endpoint to extract Nvidia DCGM metrics from.
- Options:
- - Extract DCGM-related metrics from a query to the Prometheus `/api/v1/query` endpoint.
- It is a common practice to aggregate metrics reported by the instances of the DCGM Exporter
- running on different nodes in a cluster using Prometheus.
- - TODO: Parse metrics directly from the `/metrics` endpoint of the DCGM Exporter.
- Examples:
- - `http://localhost:9400/api/v1/query?query=DCGM_FI_DEV_GPU_TEMP{node="l1337", cluster="globular"}`.
- - TODO: `http://192.168.0.1:9400/metrics`.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_stats_open_metrics_endpoints: Optional[Dict[str, str]] = None
- """OpenMetrics `/metrics` endpoints to monitor for system metrics."""
- x_stats_open_metrics_filters: Union[
- Dict[str, Dict[str, str]], Sequence[str], None
- ] = None
- """Filter to apply to metrics collected from OpenMetrics `/metrics` endpoints.
- Supports two formats:
- - `{"metric regex pattern, including endpoint name as prefix": {"label": "label value regex pattern"}}`
- - `("metric regex pattern 1", "metric regex pattern 2", ...)`
- """
- x_stats_open_metrics_http_headers: Optional[Dict[str, str]] = None
- """HTTP headers to add to OpenMetrics requests."""
- x_stats_disk_paths: Optional[Sequence[str]] = ("/",)
- """System paths to monitor for disk usage."""
- x_stats_cpu_count: Optional[int] = None
- """System CPU count.
- If set, overrides the auto-detected value in the run metadata.
- """
- x_stats_cpu_logical_count: Optional[int] = None
- """Logical CPU count.
- If set, overrides the auto-detected value in the run metadata.
- """
- x_stats_gpu_count: Optional[int] = None
- """GPU device count.
- If set, overrides the auto-detected value in the run metadata.
- """
- x_stats_gpu_type: Optional[str] = None
- """GPU device type.
- If set, overrides the auto-detected value in the run metadata.
- """
- x_stats_gpu_device_ids: Optional[Sequence[int]] = None
- """GPU device indices to monitor.
- If not set, the system monitor captures metrics for all GPUs.
- Assumes 0-based indexing matching CUDA/ROCm device enumeration.
- """
- x_stats_buffer_size: int = 0
- """Number of system metric samples to buffer in memory in the wandb-core process.
- Can be accessed via run._system_metrics.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_stats_coreweave_metadata_base_url: str = "http://169.254.169.254"
- """The scheme and hostname for contacting the CoreWeave metadata server.
- Only accessible from within a CoreWeave cluster.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_stats_coreweave_metadata_endpoint: str = "/api/v2/cloud-init/meta-data"
- """The relative path on the CoreWeave metadata server to which to make requests.
- This must not include the schema and hostname prefix.
- Only accessible from within a CoreWeave cluster.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_stats_track_process_tree: bool = False
- """Monitor the entire process tree for resource usage, starting from `x_stats_pid`.
- When `True`, the system monitor aggregates the RSS, CPU%, and thread count
- from the process with PID `x_stats_pid` and all of its descendants.
- This can have a performance overhead and is disabled by default.
- """
- x_sync: bool = False
- """Flag to indicate whether we are syncing a run from the transaction log.
- <!-- lazydoc-ignore-class-attributes -->
- """
- x_sync_dir_suffix: str = ""
- """Suffix to add to the run's directory name (sync_dir).
- This is set in wandb.init() to avoid naming conflicts.
- If set, it is joined to the default name with a dash.
- """
- x_update_finish_state: bool = True
- """Flag to indicate whether this process can update the run's final state on the server.
- Set to False in distributed training when only the main process should determine the final state.
- """
- # Model validator to catch legacy settings.
- @model_validator(mode="before")
- @classmethod
- def catch_private_settings(cls, values):
- """Check if a private field is provided and assign to the corresponding public one.
- This is a compatibility layer to handle previous versions of the settings.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- new_values = {}
- for key in values:
- # Internal settings are prefixed with "x_" instead of "_"
- # as Pydantic does not allow "_" in field names.
- if key.startswith("_"):
- new_values["x" + key] = values[key]
- else:
- new_values[key] = values[key]
- return new_values
- if IS_PYDANTIC_V2:
- @model_validator(mode="after")
- def validate_mutual_exclusion_of_branching_args(self) -> Self:
- """Check if `fork_from`, `resume`, and `resume_from` are mutually exclusive.
- <!-- lazydoc-ignore: internal -->
- """
- if (
- sum(
- o is not None
- for o in [self.fork_from, self.resume, self.resume_from]
- )
- > 1
- ):
- raise ValueError(
- "`fork_from`, `resume`, or `resume_from` are mutually exclusive. "
- "Please specify only one of them."
- )
- return self
- @model_validator(mode="after")
- def validate_skip_transaction_log(self):
- """Validate x_skip_transaction_log.
- <!-- lazydoc-ignore: internal -->
- """
- if self._offline and self.x_skip_transaction_log:
- raise ValueError("Cannot skip transaction log in offline mode")
- return self
- else:
- @root_validator(pre=False) # type: ignore [call-overload]
- @classmethod
- def validate_mutual_exclusion_of_branching_args(cls, values):
- if (
- sum(
- values.get(o) is not None
- for o in ["fork_from", "resume", "resume_from"]
- )
- > 1
- ):
- raise ValueError(
- "`fork_from`, `resume`, or `resume_from` are mutually exclusive. "
- "Please specify only one of them."
- )
- return values
- @root_validator(pre=False) # type: ignore [call-overload]
- @classmethod
- def validate_skip_transaction_log(cls, values):
- if values.get("_offline") and values.get("x_skip_transaction_log"):
- raise ValueError("Cannot skip transaction log in offline mode")
- return values
- # Field validators.
- @field_validator("anonymous", mode="after")
- @classmethod
- def validate_anonymous(cls, value: object) -> object:
- if value is not deprecation.UNSET:
- wandb.termwarn(
- "The anonymous setting has no effect and will be removed"
- + " in a future version.",
- repeat=False,
- )
- return value
- @field_validator("api_key", mode="after")
- @classmethod
- def validate_api_key(cls, value):
- """Validate the API key.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is not None and (len(value) > len(value.strip())):
- raise UsageError("API key cannot start or end with whitespace")
- return value
- @field_validator("base_url", mode="after")
- @classmethod
- def validate_base_url(cls, value):
- """Validate the base URL.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- urls.validate_url(value)
- # wandb.ai-specific checks
- if re.match(r".*wandb\.ai[^\.]*$", value) and "api." not in value:
- # user might guess app.wandb.ai or wandb.ai is the default cloud server
- raise ValueError(
- f"{value} is not a valid server address, did you mean https://api.wandb.ai?"
- )
- elif re.match(r".*wandb\.ai[^\.]*$", value) and not value.startswith("https"):
- raise ValueError("http is not secure, please use https://api.wandb.ai")
- return value.rstrip("/")
- @field_validator("code_dir", mode="before")
- @classmethod
- def validate_code_dir(cls, value):
- """Validate the code directory.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- # TODO: add native support for pathlib.Path
- if isinstance(value, pathlib.Path):
- return str(value)
- return value
- @field_validator("console", mode="after")
- @classmethod
- def validate_console(cls, value, values):
- """Validate the console capture method.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value != "auto":
- return value
- return "wrap"
- @field_validator("console_chunk_max_bytes", mode="after")
- @classmethod
- def validate_console_chunk_max_bytes(cls, value):
- """Validate the console_chunk_max_bytes value.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value < 0:
- raise ValueError("console_chunk_max_bytes must be non-negative")
- return value
- @field_validator("console_chunk_max_seconds", mode="after")
- @classmethod
- def validate_console_chunk_max_seconds(cls, value):
- """Validate the console_chunk_max_seconds value.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value < 0:
- raise ValueError("console_chunk_max_seconds must be non-negative")
- return value
- @field_validator("x_executable", mode="before")
- @classmethod
- def validate_x_executable(cls, value):
- """Validate the Python executable path.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- # TODO: add native support for pathlib.Path
- if isinstance(value, pathlib.Path):
- return str(value)
- return value
- @field_validator("x_extra_http_headers", mode="before")
- @classmethod
- def validate_x_extra_http_headers(cls, value):
- if isinstance(value, str):
- return json.loads(value)
- return value
- @field_validator("x_file_stream_max_line_bytes", mode="after")
- @classmethod
- def validate_file_stream_max_line_bytes(cls, value):
- """Validate the maximum line length for filestream JSONL files.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is not None and value < 1:
- raise ValueError("File stream max line bytes must be greater than 0")
- return value
- @field_validator("x_files_dir", mode="before")
- @classmethod
- def validate_x_files_dir(cls, value):
- """Validate the files directory.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- # TODO: add native support for pathlib.Path
- if isinstance(value, pathlib.Path):
- return str(value)
- return value
- @field_validator("fork_from", mode="before")
- @classmethod
- def validate_fork_from(cls, value, values) -> Optional[RunMoment]:
- """Validate the fork_from field.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- run_moment = cls._runmoment_preprocessor(value)
- if hasattr(values, "data"):
- # pydantic v2
- values = values.data
- else:
- # pydantic v1
- values = values
- if (
- run_moment
- and values.get("run_id") is not None
- and values.get("run_id") == run_moment.run
- ):
- raise ValueError(
- "Provided `run_id` is the same as the run to `fork_from`. "
- "Please provide a different `run_id` or remove the `run_id` argument. "
- "If you want to rewind the current run, please use `resume_from` instead."
- )
- return run_moment
- @field_validator("http_proxy", mode="after")
- @classmethod
- def validate_http_proxy(cls, value):
- """Validate the HTTP proxy.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is None:
- return None
- urls.validate_url(value)
- return value.rstrip("/")
- @field_validator("https_proxy", mode="after")
- @classmethod
- def validate_https_proxy(cls, value):
- """Validate the HTTPS proxy.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is None:
- return None
- urls.validate_url(value)
- return value.rstrip("/")
- @field_validator("ignore_globs", mode="after")
- @classmethod
- def validate_ignore_globs(cls, value):
- """Validate the ignore globs.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- return tuple(value) if not isinstance(value, tuple) else value
- @field_validator("program", mode="before")
- @classmethod
- def validate_program(cls, value):
- """Validate the program path.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- # TODO: add native support for pathlib.Path
- if isinstance(value, pathlib.Path):
- return str(value)
- return value
- @field_validator("program_abspath", mode="before")
- @classmethod
- def validate_program_abspath(cls, value):
- """Validate the absolute program path.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- # TODO: add native support for pathlib.Path
- if isinstance(value, pathlib.Path):
- return str(value)
- return value
- @field_validator("program_relpath", mode="before")
- @classmethod
- def validate_program_relpath(cls, value):
- """Validate the relative program path.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- # TODO: add native support for pathlib.Path
- if isinstance(value, pathlib.Path):
- return str(value)
- return value
- @field_validator("project", mode="after")
- @classmethod
- def validate_project(cls, value, values):
- """Validate the project name.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is None:
- return None
- invalid_chars_list = list("/\\#?%:")
- if len(value) > 128:
- raise UsageError(f"Invalid project name {value!r}: exceeded 128 characters")
- invalid_chars = {char for char in invalid_chars_list if char in value}
- if invalid_chars:
- raise UsageError(
- f"Invalid project name {value!r}: "
- f"cannot contain characters {','.join(invalid_chars_list)!r}, "
- f"found {','.join(invalid_chars)!r}"
- )
- return value
- @field_validator("resume", mode="before")
- @classmethod
- def validate_resume(cls, value):
- """Validate the resume behavior.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is False:
- return None
- if value is True:
- return "auto"
- return value
- @field_validator("resume_from", mode="before")
- @classmethod
- def validate_resume_from(cls, value, values) -> Optional[RunMoment]:
- """Validate the resume_from field.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- run_moment = cls._runmoment_preprocessor(value)
- if hasattr(values, "data"):
- # pydantic v2
- values = values.data
- else:
- # pydantic v1
- values = values
- if (
- run_moment
- and values.get("run_id") is not None
- and values.get("run_id") != run_moment.run
- ):
- raise ValueError(
- "Both `run_id` and `resume_from` have been specified with different ids."
- )
- return run_moment
- @field_validator("root_dir", mode="before")
- @classmethod
- def validate_root_dir(cls, value):
- """Validate the root directory.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- # TODO: add native support for pathlib.Path
- if isinstance(value, pathlib.Path):
- return str(value)
- return value
- @field_validator("run_id", mode="after")
- @classmethod
- def validate_run_id(cls, value, values):
- """Validate the run ID.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is None:
- return None
- if len(value) == 0:
- raise UsageError("Run ID cannot be empty")
- if len(value) > len(value.strip()):
- raise UsageError("Run ID cannot start or end with whitespace")
- if not bool(value.strip()):
- raise UsageError("Run ID cannot contain only whitespace")
- # check if the run id contains any reserved characters
- reserved_chars = ":;,#?/'"
- if any(char in reserved_chars for char in value):
- raise UsageError(f"Run ID cannot contain the characters: {reserved_chars}")
- return value
- @field_validator("settings_system", mode="after")
- @classmethod
- def validate_settings_system(cls, value):
- """Validate the system settings file path.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is None:
- return None
- elif isinstance(value, pathlib.Path):
- return str(_path_convert(value))
- else:
- return _path_convert(value)
- @field_validator("x_service_wait", mode="after")
- @classmethod
- def validate_service_wait(cls, value):
- """Validate the service wait time.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value < 0:
- raise UsageError("Service wait time cannot be negative")
- return value
- @field_validator("start_method", mode="after")
- @classmethod
- def validate_start_method(cls, value):
- """Validate the start method for subprocesses.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is None:
- return value
- wandb.termwarn(
- "`start_method` is deprecated and will be removed in a future version "
- "of wandb. This setting is currently non-functional and safely ignored.",
- repeat=False,
- )
- return value
- @field_validator("x_stats_coreweave_metadata_base_url", mode="after")
- @classmethod
- def validate_x_stats_coreweave_metadata_base_url(cls, value):
- urls.validate_url(value)
- return value.rstrip("/")
- @field_validator("x_stats_gpu_device_ids", mode="before")
- @classmethod
- def validate_x_stats_gpu_device_ids(cls, value):
- """Validate the GPU device IDs.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if isinstance(value, str):
- return json.loads(value)
- return value
- @field_validator("x_stats_neuron_monitor_config_path", mode="before")
- @classmethod
- def validate_x_stats_neuron_monitor_config_path(cls, value):
- """Validate the path to the neuron-monitor config file.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- # TODO: add native support for pathlib.Path
- if isinstance(value, pathlib.Path):
- return str(value)
- return value
- @field_validator("x_stats_open_metrics_endpoints", mode="before")
- @classmethod
- def validate_stats_open_metrics_endpoints(cls, value):
- """Validate the OpenMetrics endpoints.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if isinstance(value, str):
- return json.loads(value)
- return value
- @field_validator("x_stats_open_metrics_filters", mode="before")
- @classmethod
- def validate_stats_open_metrics_filters(cls, value):
- """Validate the OpenMetrics filters.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if isinstance(value, str):
- return json.loads(value)
- return value
- @field_validator("x_stats_open_metrics_http_headers", mode="before")
- @classmethod
- def validate_stats_open_metrics_http_headers(cls, value):
- """Validate the OpenMetrics HTTP headers.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if isinstance(value, str):
- return json.loads(value)
- return value
- @field_validator("x_stats_sampling_interval", mode="after")
- @classmethod
- def validate_stats_sampling_interval(cls, value):
- """Validate the stats sampling interval.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value < 0.1:
- raise UsageError("Stats sampling interval cannot be less than 0.1 seconds")
- return value
- @field_validator("sweep_id", mode="after")
- @classmethod
- def validate_sweep_id(cls, value):
- """Validate the sweep ID.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- if value is None:
- return None
- if len(value) == 0:
- raise UsageError("Sweep ID cannot be empty")
- if len(value) > len(value.strip()):
- raise UsageError("Sweep ID cannot start or end with whitespace")
- if not bool(value.strip()):
- raise UsageError("Sweep ID cannot contain only whitespace")
- return value
- @field_validator("run_tags", mode="before")
- @classmethod
- def validate_run_tags(cls, value):
- """Validate run tags.
- Validates that each tag:
- - Is between 1 and 64 characters in length (inclusive)
- - Converts single string values to tuple format
- - Preserves None values
- <!-- lazydoc-ignore-classmethod: internal -->
- Args:
- value: A string, list, tuple, or None representing tags
- Returns:
- tuple: A tuple of validated tags, or None
- Raises:
- ValueError: If any tag is empty or exceeds 64 characters
- """
- if value is None:
- return None
- # Convert to tuple if needed
- if isinstance(value, str):
- tags = (value,)
- else:
- tags = tuple(value)
- # Validate each tag and accumulate errors
- errors = []
- for i, tag in enumerate(tags):
- tag_str = str(tag)
- if len(tag_str) == 0:
- errors.append(
- f"Tag at index {i} is empty. Tags must be between 1 and 64 characters"
- )
- elif len(tag_str) > 64:
- # Truncate long tags for display
- display_tag = (
- f"{tag_str[:20]}...{tag_str[-20:]}"
- if len(tag_str) > 43
- else tag_str
- )
- errors.append(
- f"Tag '{display_tag}' is {len(tag_str)} characters. Tags must be between 1 and 64 characters"
- )
- # Raise combined error if any validation issues were found
- if errors:
- raise ValueError("; ".join(errors))
- return tags
- @field_validator("sweep_param_path", mode="before")
- @classmethod
- def validate_sweep_param_path(cls, value):
- """Validate the sweep parameter path.
- <!-- lazydoc-ignore-classmethod: internal -->
- """
- # TODO: add native support for pathlib.Path
- if isinstance(value, pathlib.Path):
- return str(value)
- return value
- # Computed fields.
- @computed_field # type: ignore[prop-decorator]
- @property
- def _args(self) -> List[str]:
- if not self._jupyter:
- return sys.argv[1:]
- return []
- @computed_field # type: ignore[prop-decorator]
- @property
- def _aws_lambda(self) -> bool:
- """Check if we are running in a lambda environment."""
- from sentry_sdk.integrations.aws_lambda import ( # type: ignore[import-not-found]
- get_lambda_bootstrap,
- )
- lambda_bootstrap = get_lambda_bootstrap()
- return not (
- not lambda_bootstrap
- or not hasattr(lambda_bootstrap, "handle_event_request")
- )
- @computed_field # type: ignore[prop-decorator]
- @property
- def _code_path_local(self) -> Optional[str]:
- """The relative path from the current working directory to the code path.
- For example, if the code path is /home/user/project/example.py, and the
- current working directory is /home/user/project, then the code path local
- is example.py.
- If couldn't find the relative path, this will be an empty string.
- """
- return self._get_program_relpath(self.program) if self.program else None
- @computed_field # type: ignore[prop-decorator]
- @property
- def _colab(self) -> bool:
- return "google.colab" in sys.modules
- @computed_field # type: ignore[prop-decorator]
- @property
- def _ipython(self) -> bool:
- return ipython.in_ipython()
- @computed_field # type: ignore[prop-decorator]
- @property
- def _jupyter(self) -> bool:
- return ipython.in_jupyter()
- @computed_field # type: ignore[prop-decorator]
- @property
- def _kaggle(self) -> bool:
- return util._is_likely_kaggle()
- @computed_field # type: ignore[prop-decorator]
- @property
- def _noop(self) -> bool:
- return self.mode == "disabled"
- @computed_field # type: ignore[prop-decorator]
- @property
- def _notebook(self) -> bool:
- return self._ipython or self._jupyter or self._colab or self._kaggle
- @computed_field # type: ignore[prop-decorator]
- @property
- def _offline(self) -> bool:
- return self.mode in ("offline", "dryrun")
- @computed_field # type: ignore[prop-decorator]
- @property
- def _os(self) -> str:
- """The operating system of the machine running the script."""
- return platform.platform(aliased=True)
- @computed_field # type: ignore[prop-decorator]
- @property
- def _platform(self) -> str:
- return f"{platform.system()}-{platform.machine()}".lower()
- @computed_field # type: ignore[prop-decorator]
- @property
- def _python(self) -> str:
- return f"{platform.python_implementation()} {platform.python_version()}"
- @computed_field # type: ignore[prop-decorator]
- @property
- def _shared(self) -> bool:
- """Whether we are in shared mode.
- In "shared" mode, multiple processes can write to the same run,
- for example from different machines.
- """
- return self.mode == "shared"
- @computed_field # type: ignore[prop-decorator]
- @property
- def _start_datetime(self) -> str:
- if self.x_start_time is None:
- return ""
- datetime_now = datetime.fromtimestamp(self.x_start_time)
- return datetime_now.strftime("%Y%m%d_%H%M%S")
- @computed_field # type: ignore[prop-decorator]
- @property
- def _tmp_code_dir(self) -> str:
- return _path_convert(self.sync_dir, "tmp", "code")
- @computed_field # type: ignore[prop-decorator]
- @property
- def _windows(self) -> bool:
- return platform.system() == "Windows"
- @computed_field # type: ignore[prop-decorator]
- @property
- def app_url(self) -> str:
- """The URL for the W&B UI, usually https://wandb.ai.
- This is different from `base_url` (like https://api.wandb.ai) which
- is used to access W&B APIs programmatically.
- """
- return self.app_url_override or util.api_to_app_url(self.base_url)
- @computed_field # type: ignore[prop-decorator]
- @property
- def colab_url(self) -> Optional[str]:
- """The URL to the Colab notebook, if running in Colab."""
- if not self._colab:
- return None
- if self.x_jupyter_path and self.x_jupyter_path.startswith("fileId="):
- unescaped = unquote(self.x_jupyter_path)
- return "https://colab.research.google.com/notebook#" + unescaped
- return None
- @computed_field # type: ignore[prop-decorator]
- @property
- def deployment(self) -> Literal["local", "cloud"]:
- return "local" if self.is_local else "cloud"
- @computed_field # type: ignore[prop-decorator]
- @property
- def files_dir(self) -> str:
- """Absolute path to the local directory where the run's files are stored."""
- # Must match the logic in settings.go in the service process.
- return self.x_files_dir or _path_convert(self.sync_dir, "files")
- @computed_field # type: ignore[prop-decorator]
- @property
- def is_local(self) -> bool:
- return str(self.base_url) != "https://api.wandb.ai"
- @computed_field # type: ignore[prop-decorator]
- @property
- def log_dir(self) -> str:
- """The directory for storing log files."""
- return _path_convert(self.sync_dir, "logs")
- @computed_field # type: ignore[prop-decorator]
- @property
- def log_internal(self) -> str:
- """The path to the file to use for internal logs."""
- return _path_convert(self.log_dir, "debug-internal.log")
- @computed_field # type: ignore[prop-decorator]
- @property
- def log_symlink_internal(self) -> str:
- """The path to the symlink to the internal log file of the most recent run."""
- return _path_convert(self.wandb_dir, "debug-internal.log")
- @computed_field # type: ignore[prop-decorator]
- @property
- def log_symlink_user(self) -> str:
- """The path to the symlink to the user-process log file of the most recent run."""
- return _path_convert(self.wandb_dir, "debug.log")
- @computed_field # type: ignore[prop-decorator]
- @property
- def log_user(self) -> str:
- """The path to the file to use for user-process logs."""
- return _path_convert(self.log_dir, "debug.log")
- @computed_field # type: ignore[prop-decorator]
- @property
- def project_url(self) -> str:
- """The W&B URL where the project can be viewed."""
- project_url = self._project_url_base()
- if not project_url:
- return ""
- return project_url
- @computed_field # type: ignore[prop-decorator]
- @property
- def resume_fname(self) -> str:
- """The path to the resume file."""
- return _path_convert(self.wandb_dir, "wandb-resume.json")
- @computed_field # type: ignore[prop-decorator]
- @property
- def run_mode(self) -> Literal["run", "offline-run"]:
- """The mode of the run. Can be either "run" or "offline-run"."""
- return "run" if not self._offline else "offline-run"
- @computed_field # type: ignore[prop-decorator]
- @property
- def run_url(self) -> str:
- """The W&B URL where the run can be viewed."""
- project_url = self._project_url_base()
- if not all([project_url, self.run_id]):
- return ""
- # Exclude specific safe characters from URL encoding to prevent 404 errors
- safe_chars = "=+&$@"
- return f"{project_url}/runs/{quote(self.run_id or '', safe=safe_chars)}"
- @computed_field # type: ignore[prop-decorator]
- @property
- def settings_workspace(self) -> str:
- """The path to the workspace settings file."""
- return _path_convert(self.wandb_dir, "settings")
- @computed_field # type: ignore[prop-decorator]
- @property
- def sweep_url(self) -> str:
- """The W&B URL where the sweep can be viewed."""
- project_url = self._project_url_base()
- if not all([project_url, self.sweep_id]):
- return ""
- return f"{project_url}/sweeps/{quote(self.sweep_id or '')}"
- @computed_field # type: ignore[prop-decorator]
- @property
- def sync_dir(self) -> str:
- """The directory for storing the run's files."""
- name = f"{self.run_mode}-{self.timespec}-{self.run_id}"
- if self.x_sync_dir_suffix:
- name += f"-{self.x_sync_dir_suffix}"
- return _path_convert(self.wandb_dir, name)
- @computed_field # type: ignore[prop-decorator]
- @property
- def sync_file(self) -> str:
- """Path to the append-only binary transaction log file."""
- return _path_convert(self.sync_dir, f"run-{self.run_id}.wandb")
- @computed_field # type: ignore[prop-decorator]
- @property
- def sync_symlink_latest(self) -> str:
- """Path to the symlink to the most recent run's transaction log file."""
- return _path_convert(self.wandb_dir, "latest-run")
- @computed_field # type: ignore[prop-decorator]
- @property
- def timespec(self) -> str:
- """The time specification for the run."""
- return self._start_datetime
- @computed_field # type: ignore[prop-decorator]
- @property
- def wandb_dir(self) -> str:
- """Full path to the wandb directory."""
- if self.use_dot_wandb is None:
- use_dot = pathlib.Path(self.root_dir, ".wandb").exists()
- else:
- use_dot = self.use_dot_wandb
- dirname = ".wandb" if use_dot else "wandb"
- return str(pathlib.Path(self.root_dir, dirname).expanduser())
- # Methods to collect and update settings from different sources.
- #
- # The Settings class does not track the source of the settings,
- # so it is up to the developer to ensure that the settings are applied
- # in the correct order. Most of the updates are done in
- # wandb/sdk/wandb_setup.py::_WandbSetup._settings_setup.
- def read_system_settings(self) -> settings_file.SettingsFiles:
- """Read settings from the workspace and global settings files.
- The files are determined by the settings_system and settings_workspace
- settings.
- The resulting object is a snapshot of the system settings at the time
- this function is used and does not reflect the settings on this Settings
- object. It can be used to update the files, and it should be short-lived
- since it does not reflect external changes to the files.
- Updating the settings files does not update this Settings instance
- and vice versa.
- <!-- lazydoc-ignore: internal -->
- """
- local_settings = pathlib.Path(self.settings_workspace)
- if self.settings_system:
- global_settings = pathlib.Path(self.settings_system)
- else:
- global_settings = None
- return settings_file.SettingsFiles(
- global_settings=global_settings,
- local_settings=local_settings,
- )
- def update_from_system_settings(self) -> None:
- """Load settings from the settings files.
- If settings files contain invalid settings, prints and suppresses
- the error.
- <!-- lazydoc-ignore: internal -->
- """
- system_settings = self.read_system_settings()
- if len(system_settings.sources) == 0:
- return
- elif len(system_settings.sources) == 1:
- source_string = str(system_settings.sources[0])
- else:
- source_string = "\n" + "\n".join(
- f" {source}" for source in system_settings.sources
- )
- # Print at the start so that users can diagnose uncaught exceptions.
- if not self.quiet:
- printed_sources = True
- wandb.termlog(f"Loading settings from {source_string}")
- else:
- printed_sources = False
- try:
- parsed_settings = _parse_system_settings(system_settings)
- except Exception as e:
- if not printed_sources:
- wandb.termerror(f"Failed to load settings from {source_string}")
- if isinstance(e, ValidationError):
- # Pydantic ValidationErrors have detailed messages that we can
- # print without a stack trace.
- wandb.termerror(str(e))
- else:
- # For all other errors, we need to dump a stack trace to make
- # sure they're debuggable.
- tb = traceback.format_exception(type(e), e, e.__traceback__)
- wandb.termerror("".join(tb))
- return
- # We parse and set in different steps so that we do not partially
- # apply a broken settings file.
- #
- # Note that this runs validation functions a second time, but we expect
- # them to succeed.
- self.update_from_settings(parsed_settings)
- def update_from_env_vars(self, environ: Dict[str, Any]):
- """Update settings from environment variables.
- <!-- lazydoc-ignore: internal -->
- """
- env_prefix: str = "WANDB_"
- private_env_prefix: str = env_prefix + "_"
- special_env_var_names = {
- env.APP_URL: "app_url_override",
- "WANDB_SERVICE_TRANSPORT": "x_service_transport",
- env.DIR: "root_dir",
- env.NAME: "run_name",
- env.NOTES: "run_notes",
- env.TAGS: "run_tags",
- env.JOB_TYPE: "run_job_type",
- env.HTTP_TIMEOUT: "x_graphql_timeout_seconds",
- env.FILE_PUSHER_TIMEOUT: "x_file_transfer_timeout_seconds",
- env.USER_EMAIL: "email",
- }
- for setting, value in environ.items():
- if not setting.startswith(env_prefix):
- continue
- if setting in special_env_var_names:
- key = special_env_var_names[setting]
- elif setting.startswith(private_env_prefix):
- key = "x_" + setting[len(private_env_prefix) :].lower()
- else:
- # otherwise, strip the prefix and convert to lowercase
- key = setting[len(env_prefix) :].lower()
- if key not in self.__dict__:
- continue
- if key in ("ignore_globs", "run_tags"):
- value = value.split(",")
- if value is None:
- continue
- setattr(self, key, value)
- def update_from_system_environment(self):
- """Update settings from the system environment.
- <!-- lazydoc-ignore: internal -->
- """
- # For code saving, only allow env var override if value from server is true, or
- # if no preference was specified.
- if (self.save_code is True or self.save_code is None) and (
- os.getenv(env.SAVE_CODE) is not None
- or os.getenv(env.DISABLE_CODE) is not None
- ):
- self.save_code = env.should_save_code()
- if os.getenv(env.DISABLE_GIT) is not None:
- self.disable_git = env.disable_git()
- # Attempt to get notebook information if not already set by the user
- if self._jupyter and (self.notebook_name is None or self.notebook_name == ""):
- meta = wandb.jupyter.notebook_metadata(self.silent) # type: ignore
- self.x_jupyter_path = meta.get("path")
- self.x_jupyter_name = meta.get("name")
- self.x_jupyter_root = meta.get("root")
- elif (
- self._jupyter
- and self.notebook_name is not None
- and os.path.exists(self.notebook_name)
- ):
- self.x_jupyter_path = self.notebook_name
- self.x_jupyter_name = self.notebook_name
- self.x_jupyter_root = os.getcwd()
- elif self._jupyter:
- wandb.termwarn(
- "WANDB_NOTEBOOK_NAME should be a path to a notebook file, "
- f"couldn't find {self.notebook_name}.",
- )
- # host is populated by update_from_env_vars if the corresponding env
- # vars exist -- but if they don't, we'll fill them in here.
- if self.host is None:
- self.host = socket.gethostname() # type: ignore
- _executable = (
- self.x_executable
- or os.environ.get(env._EXECUTABLE)
- or sys.executable
- or shutil.which("python3")
- or "python3"
- )
- self.x_executable = _executable
- if self.docker is None:
- self.docker = env.get_docker(util.image_id_from_k8s())
- # proceed if not in CLI mode
- if self.x_cli_only_mode:
- return
- program = self.program or self._get_program()
- if program is not None:
- self._setup_code_paths(program)
- else:
- program = "<python with no main file>"
- self.program = program
- def update_from_dict(self, settings: Dict[str, Any]) -> None:
- """Update settings from a dictionary.
- <!-- lazydoc-ignore: internal -->
- """
- for key, value in dict(settings).items():
- if value is not None:
- setattr(self, key, value)
- def update_from_settings(self, settings: Settings) -> None:
- """Update settings from another instance of `Settings`.
- <!-- lazydoc-ignore: internal -->
- """
- d = {field: getattr(settings, field) for field in settings.model_fields_set}
- if d:
- self.update_from_dict(d)
- # Helper methods.
- def to_proto(self) -> wandb_settings_pb2.Settings:
- """Generate a protobuf representation of the settings.
- <!-- lazydoc-ignore: internal -->
- """
- settings_proto = wandb_settings_pb2.Settings()
- for k, v in self.model_dump(exclude_none=True).items():
- if k in CLIENT_ONLY_SETTINGS:
- continue
- # Special case for x_stats_open_metrics_filters.
- if k == "x_stats_open_metrics_filters":
- if isinstance(v, (list, set, tuple)):
- setting = getattr(settings_proto, k)
- setting.sequence.value.extend(v)
- elif isinstance(v, dict):
- setting = getattr(settings_proto, k)
- for key, value in v.items():
- for kk, vv in value.items():
- setting.mapping.value[key].value[kk] = vv
- else:
- raise TypeError(f"Unsupported type {type(v)} for setting {k}")
- continue
- # Special case for RunMoment fields.
- if k in ("fork_from", "resume_from"):
- run_moment = (
- v
- if isinstance(v, RunMoment)
- else RunMoment(
- run=v.get("run"),
- value=v.get("value"),
- metric=v.get("metric"),
- )
- )
- getattr(settings_proto, k).CopyFrom(
- wandb_settings_pb2.RunMoment(
- run=run_moment.run,
- value=run_moment.value,
- metric=run_moment.metric,
- )
- )
- continue
- if isinstance(v, bool):
- getattr(settings_proto, k).CopyFrom(BoolValue(value=v))
- elif isinstance(v, int):
- getattr(settings_proto, k).CopyFrom(Int32Value(value=v))
- elif isinstance(v, float):
- getattr(settings_proto, k).CopyFrom(DoubleValue(value=v))
- elif isinstance(v, str):
- getattr(settings_proto, k).CopyFrom(StringValue(value=v))
- elif isinstance(v, (list, set, tuple)):
- # we only support sequences of strings for now
- sequence = getattr(settings_proto, k)
- sequence.value.extend(v)
- elif isinstance(v, dict):
- mapping = getattr(settings_proto, k)
- for key, value in v.items():
- # we only support dicts with string values for now
- mapping.value[key] = value
- elif v is None:
- # None means that the setting value was not set.
- pass
- else:
- raise TypeError(f"Unsupported type {type(v)} for setting {k}")
- return settings_proto
- def _get_program(self) -> Optional[str]:
- """Get the program that started the current process."""
- if self._jupyter:
- # If in a notebook, try to get the program from the notebook metadata.
- if self.notebook_name:
- return self.notebook_name
- if not self.x_jupyter_path:
- return self.program
- if self.x_jupyter_path.startswith("fileId="):
- return self.x_jupyter_name
- return self.x_jupyter_path
- # If not in a notebook, try to get the program from the environment
- # or the __main__ module for scripts run as `python -m ...`.
- program = os.getenv(env.PROGRAM)
- if program is not None:
- return program
- try:
- import __main__
- except ImportError:
- return None
- try:
- if __main__.__spec__ is None:
- python_args = __main__.__file__
- else:
- python_args = f"-m {__main__.__spec__.name}"
- except AttributeError:
- return None
- return python_args
- @staticmethod
- def _get_program_relpath(program: str, root: Optional[str] = None) -> Optional[str]:
- """Get the relative path to the program from the root directory."""
- if not program:
- return None
- root = root or os.getcwd()
- if not root:
- return None
- # For windows, if the root and program are on different drives,
- # os.path.relpath will raise a ValueError.
- if not filesystem.are_paths_on_same_drive(
- pathlib.Path(root), pathlib.Path(program)
- ):
- return None
- full_path_to_program = os.path.join(
- root, os.path.relpath(os.getcwd(), root), program
- )
- if os.path.exists(full_path_to_program):
- relative_path = os.path.relpath(full_path_to_program, start=root)
- if "../" in relative_path:
- return None
- return relative_path
- return None
- def _project_url_base(self) -> str:
- """Construct the base URL for the project."""
- if not all([self.entity, self.project]):
- return ""
- return f"{self.app_url}/{quote(self.entity or '')}/{quote(self.project or '')}"
- @staticmethod
- def _runmoment_preprocessor(
- val: Union[RunMoment, str, None],
- ) -> Optional[RunMoment]:
- """Preprocess the setting for forking or resuming a run."""
- if isinstance(val, RunMoment) or val is None:
- return val
- elif isinstance(val, str):
- return RunMoment.from_uri(val)
- if not IS_PYDANTIC_V2:
- def model_copy(self, *args, **kwargs):
- return self.copy(*args, **kwargs)
- def model_dump(self, **kwargs):
- """Compatibility method for Pydantic v1 to mimic v2's model_dump.
- In v1, this is equivalent to dict() but also includes computed properties.
- Args:
- **kwargs: Options passed to the dict method
- - exclude_none: Whether to exclude fields with None values
- Returns:
- A dictionary of the model's fields and computed properties
- """
- # Handle exclude_none separately since it's named differently in v1
- exclude_none = kwargs.pop("exclude_none", False)
- # Start with regular fields from dict()
- result = self.dict(**kwargs)
- # Get all computed properties
- for name in dir(self.__class__):
- attr = getattr(self.__class__, name, None)
- if isinstance(attr, property):
- try:
- # Only include properties that don't raise errors
- value = getattr(self, name)
- result[name] = value
- except (AttributeError, NotImplementedError, TypeError, ValueError):
- # Skip properties that can't be accessed or raise errors
- pass
- elif isinstance(attr, RunMoment):
- value = getattr(self, name)
- result[name] = value
- # Special Pydantic attributes that should always be excluded
- exclude_fields = {
- "model_config",
- "model_fields",
- "model_fields_set",
- "__fields__",
- "__model_fields_set",
- "__pydantic_self__",
- "__pydantic_initialised__",
- }
- # Remove special Pydantic attributes
- for field in exclude_fields:
- if field in result:
- del result[field]
- if exclude_none:
- # Remove None values from the result
- return {k: v for k, v in result.items() if v is not None}
- return result
- @property
- def model_fields_set(self) -> set:
- """Return a set of fields that have been explicitly set.
- This is a compatibility property for Pydantic v1 to mimic v2's model_fields_set.
- """
- return getattr(self, "__fields_set__", set())
- def _setup_code_paths(self, program: str):
- """Sets the program_abspath and program_relpath settings."""
- if self._jupyter and self.x_jupyter_root:
- self._infer_code_paths_for_jupyter(program)
- else:
- self._infer_code_path_for_program(program)
- def _infer_code_path_for_program(self, program: str):
- """Finds the program's absolute and relative paths."""
- from .lib.gitlib import GitRepo
- try:
- root = (
- GitRepo().root or os.getcwd() if not self.disable_git else os.getcwd()
- )
- except Exception:
- # if the git command fails, fall back to the current working directory
- root = os.getcwd()
- self.program_relpath = self.program_relpath or self._get_program_relpath(
- program, root
- )
- program_abspath = os.path.abspath(
- os.path.join(root, os.path.relpath(os.getcwd(), root), program)
- )
- if os.path.exists(program_abspath):
- self.program_abspath = program_abspath
- def _infer_code_paths_for_jupyter(self, program: str):
- """Find the notebook's absolute and relative paths.
- Since the notebook's execution environment
- is not the same as the current working directory.
- We utilize the metadata provided by the jupyter server.
- """
- if not self.x_jupyter_root or not program:
- return None
- self.program_abspath = os.path.abspath(
- os.path.join(self.x_jupyter_root, program)
- )
- self.program_relpath = program
- def _parse_system_settings(
- system_settings: settings_file.SettingsFiles,
- ) -> Settings:
- """Validate settings from a settings file.
- Returns:
- A validated Settings object.
- Raises:
- ValidationError: on invalid data.
- Exception: arbitrary errors can occur when constructing Settings.
- """
- fields: dict[str, Any] = dict()
- value: object # Can be transformed arbitrarily.
- for key, value in system_settings.all().items():
- if key == "ignore_globs":
- fields[key] = value.split(",")
- elif key == "anonymous":
- wandb.termwarn(
- "Deprecated setting 'anonymous' has no effect and will be"
- + " removed in a future version of wandb."
- + " Please delete it manually or by running `wandb login`"
- + " to avoid errors.",
- repeat=False,
- )
- fields[key] = deprecation.UNSET
- elif key in ("settings_system", "root_dir"):
- wandb.termwarn(
- f"Ignoring setting {key!r} which is not allowed in a settings file."
- + " Please delete it manually to avoid errors in the future."
- )
- else:
- fields[key] = value
- # NOTE: Field validators must raise ValueError for Pydantic to wrap them
- # in a ValidationError. Other kinds of errors will bubble up unaltered.
- #
- # Unfortunately, some validators return a UsageError, which has special
- # handling in the CLI and may require care to change.
- return Settings(**fields)
|