| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317 |
- """The layer between launch sdk user code and the wandb internal process.
- If there is an active run this communication is done through the wandb run's
- backend interface.
- If there is no active run, the messages are staged on the StagedLaunchInputs
- singleton and sent when a run is created.
- """
- from __future__ import annotations
- import os
- import pathlib
- import shutil
- import tempfile
- from typing import Any
- import wandb
- import wandb.data_types
- from wandb.sdk.launch.errors import LaunchError
- from wandb.sdk.launch.inputs.schema import META_SCHEMA
- from wandb.util import get_module
- from .files import config_path_is_valid, override_file
- PERIOD = "."
- BACKSLASH = "\\"
- LAUNCH_MANAGED_CONFIGS_DIR = "_wandb_configs"
- class ConfigTmpDir:
- """Singleton for managing temporary directories for configuration files.
- Any configuration files designated as inputs to a launch job are copied to
- a temporary directory. This singleton manages the temporary directory and
- provides paths to the configuration files.
- """
- _instance = None
- def __new__(cls):
- if cls._instance is None:
- cls._instance = object.__new__(cls)
- return cls._instance
- def __init__(self):
- if not hasattr(self, "_tmp_dir"):
- self._tmp_dir = tempfile.mkdtemp()
- self._configs_dir = os.path.join(self._tmp_dir, LAUNCH_MANAGED_CONFIGS_DIR)
- os.mkdir(self._configs_dir)
- @property
- def tmp_dir(self):
- return pathlib.Path(self._tmp_dir)
- @property
- def configs_dir(self):
- return pathlib.Path(self._configs_dir)
- class JobInputArguments:
- """Arguments for the publish_job_input of Interface."""
- def __init__(
- self,
- include: list[str] | None = None,
- exclude: list[str] | None = None,
- schema: dict | None = None,
- file_path: str | None = None,
- run_config: bool | None = None,
- ):
- self.include = include
- self.exclude = exclude
- self.schema = schema
- self.file_path = file_path
- self.run_config = run_config
- class StagedLaunchInputs:
- _instance = None
- def __new__(cls):
- if cls._instance is None:
- cls._instance = object.__new__(cls)
- return cls._instance
- def __init__(self) -> None:
- if not hasattr(self, "_staged_inputs"):
- self._staged_inputs: list[JobInputArguments] = []
- def add_staged_input(
- self,
- input_arguments: JobInputArguments,
- ):
- self._staged_inputs.append(input_arguments)
- def apply(self, run: wandb.Run):
- """Apply the staged inputs to the given run."""
- for input in self._staged_inputs:
- _publish_job_input(input, run)
- def _publish_job_input(
- input: JobInputArguments,
- run: wandb.Run,
- ) -> None:
- """Publish a job input to the backend interface of the given run.
- Arguments:
- input (JobInputArguments): The arguments for the job input.
- run (wandb.Run): The run to publish the job input to.
- """
- assert run._backend is not None
- assert run._backend.interface is not None
- assert input.run_config is not None
- interface = run._backend.interface
- if input.file_path:
- config_dir = ConfigTmpDir()
- dest = os.path.join(config_dir.configs_dir, input.file_path)
- run.save(dest, base_path=config_dir.tmp_dir)
- interface.publish_job_input(
- include_paths=[_split_on_unesc_dot(path) for path in input.include]
- if input.include
- else [],
- exclude_paths=[_split_on_unesc_dot(path) for path in input.exclude]
- if input.exclude
- else [],
- input_schema=input.schema,
- run_config=input.run_config,
- file_path=input.file_path or "",
- )
- def _replace_refs_and_allofs(schema: dict, defs: dict | None) -> dict:
- """Recursively fix JSON schemas with common issues.
- 1. Replaces any instances of $ref with their associated definition in defs
- 2. Removes any "allOf" lists that only have one item, "lifting" the item up
- See test_internal.py for examples
- """
- ret: dict[str, Any] = {}
- if "$ref" in schema and defs:
- # Reference found, replace it with its definition
- def_key = schema.pop("$ref").split("#/$defs/")[1]
- # Also run recursive replacement in case a ref contains more refs
- ret = _replace_refs_and_allofs(defs[def_key], defs)
- for key, val in schema.items():
- if isinstance(val, dict):
- # Step into dicts recursively
- new_val_dict = _replace_refs_and_allofs(val, defs)
- ret[key] = new_val_dict
- elif isinstance(val, list):
- # Step into each item in the list
- new_val_list = []
- for item in val:
- if isinstance(item, dict):
- new_val_list.append(_replace_refs_and_allofs(item, defs))
- else:
- new_val_list.append(item)
- # Lift up allOf blocks with only one item
- if (
- key == "allOf"
- and len(new_val_list) == 1
- and isinstance(new_val_list[0], dict)
- ):
- ret.update(new_val_list[0])
- else:
- ret[key] = new_val_list
- else:
- # For anything else (str, int, etc) keep it as-is
- ret[key] = val
- return ret
- def _prepare_schema(schema: Any) -> dict:
- """Prepare a schema for validation.
- This function prepares a schema for validation by:
- 1. Converting a Pydantic model instance or class to a dict
- 2. Replacing $ref with their associated definition in defs
- 3. Removing any "allOf" lists that only have one item, "lifting" the item up
- We support both an instance of a pydantic BaseModel class (e.g. schema=MySchema(...))
- or the BaseModel class itself (e.g. schema=MySchema)
- """
- if hasattr(schema, "model_json_schema") and callable(
- schema.model_json_schema # type: ignore
- ):
- schema = schema.model_json_schema()
- if not isinstance(schema, dict):
- raise LaunchError(
- "schema must be a dict, Pydantic model instance, or Pydantic model class."
- )
- defs = schema.pop("$defs", None)
- return _replace_refs_and_allofs(schema, defs)
- def _validate_schema(schema: dict) -> None:
- jsonschema = get_module(
- "jsonschema",
- required="Setting job schema requires the jsonschema package. Please install it with `pip install 'wandb[launch]'`.",
- lazy=False,
- )
- validator = jsonschema.Draft202012Validator(META_SCHEMA)
- errs = sorted(validator.iter_errors(schema), key=str)
- if errs:
- wandb.termwarn(f"Schema includes unhandled or invalid configurations:\n{errs}")
- def handle_config_file_input(
- path: str,
- include: list[str] | None = None,
- exclude: list[str] | None = None,
- schema: Any | None = None,
- ):
- """Declare an overridable configuration file for a launch job.
- The configuration file is copied to a temporary directory and the path to
- the copy is sent to the backend interface of the active run and used to
- configure the job builder.
- If there is no active run, the configuration file is staged and sent when a
- run is created.
- """
- config_path_is_valid(path)
- override_file(path)
- tmp_dir = ConfigTmpDir()
- dest = os.path.join(tmp_dir.configs_dir, path)
- dest_dir = os.path.dirname(dest)
- if not os.path.exists(dest_dir):
- os.makedirs(dest_dir)
- shutil.copy(
- path,
- dest,
- )
- if schema:
- schema = _prepare_schema(schema)
- _validate_schema(schema)
- arguments = JobInputArguments(
- include=include,
- exclude=exclude,
- schema=schema,
- file_path=path,
- run_config=False,
- )
- if wandb.run is not None:
- _publish_job_input(arguments, wandb.run)
- else:
- staged_inputs = StagedLaunchInputs()
- staged_inputs.add_staged_input(arguments)
- def handle_run_config_input(
- include: list[str] | None = None,
- exclude: list[str] | None = None,
- schema: Any | None = None,
- ):
- """Declare wandb.config as an overridable configuration for a launch job.
- The include and exclude paths are sent to the backend interface of the
- active run and used to configure the job builder.
- If there is no active run, the include and exclude paths are staged and sent
- when a run is created.
- """
- if schema:
- schema = _prepare_schema(schema)
- _validate_schema(schema)
- arguments = JobInputArguments(
- include=include,
- exclude=exclude,
- schema=schema,
- run_config=True,
- file_path=None,
- )
- if wandb.run is not None:
- _publish_job_input(arguments, wandb.run)
- else:
- stage_inputs = StagedLaunchInputs()
- stage_inputs.add_staged_input(arguments)
- def _split_on_unesc_dot(path: str) -> list[str]:
- r"""Split a string on unescaped dots.
- Arguments:
- path (str): The string to split.
- Raises:
- ValueError: If the path has a trailing escape character.
- Returns:
- List[str]: The split string.
- """
- parts = []
- part = ""
- i = 0
- while i < len(path):
- if path[i] == BACKSLASH:
- if i == len(path) - 1:
- raise LaunchError(
- f"Invalid config path {path}: trailing {BACKSLASH}.",
- )
- if path[i + 1] == PERIOD:
- part += PERIOD
- i += 2
- elif path[i] == PERIOD:
- parts.append(part)
- part = ""
- i += 1
- else:
- part += path[i]
- i += 1
- if part:
- parts.append(part)
- return parts
|