beta.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. """Beta versions of wandb CLI commands.
  2. These commands are experimental and may change or be removed in future versions.
  3. """
  4. from __future__ import annotations
  5. import pathlib
  6. from typing import Any
  7. import click
  8. from wandb.analytics import get_sentry
  9. from wandb.errors import WandbCoreNotAvailableError
  10. from wandb.util import get_core_path
  11. class DefaultCommandGroup(click.Group):
  12. """A click Group that falls through to a default command.
  13. If the first argument isn't a recognized subcommand, the default
  14. command is invoked with all arguments passed through. This allows
  15. backward-compatible CLIs where `cmd [path]` and `cmd run [path]`
  16. are equivalent.
  17. """
  18. def __init__(self, *args: Any, default_cmd: str = "run", **kwargs: Any) -> None:
  19. super().__init__(*args, **kwargs)
  20. self.default_cmd = default_cmd
  21. def parse_args(self, ctx: click.Context, args: list[str]) -> list[str]:
  22. if not args or args[0].startswith("-") or args[0] not in self.commands:
  23. args = [self.default_cmd, *args]
  24. return super().parse_args(ctx, args)
  25. def format_usage(self, ctx: click.Context, formatter: click.HelpFormatter) -> None:
  26. formatter.write_usage(ctx.command_path, "[PATH] | COMMAND [ARGS]...")
  27. @click.group()
  28. def beta() -> None:
  29. """Beta versions of wandb CLI commands.
  30. These commands may change or even completely break in any release of wandb.
  31. """
  32. get_sentry().configure_scope(process_context="wandb_beta")
  33. try:
  34. get_core_path()
  35. except WandbCoreNotAvailableError as e:
  36. get_sentry().exception(f"using `wandb beta`. failed with {e}")
  37. click.secho(
  38. (e),
  39. fg="red",
  40. err=True,
  41. )
  42. @beta.group(cls=DefaultCommandGroup, default_cmd="run", invoke_without_command=True)
  43. @click.pass_context
  44. def leet(ctx: click.Context) -> None:
  45. """W&B LEET: the Lightweight Experiment Exploration Tool.
  46. A terminal UI for viewing your W&B runs locally.
  47. Examples:
  48. wandb beta leet View latest run
  49. wandb beta leet ./wandb View runs in directory
  50. wandb beta leet symon View live local system metrics
  51. """
  52. pass
  53. @leet.command()
  54. @click.argument("path", nargs=1, type=click.STRING, required=False)
  55. @click.option(
  56. "--pprof",
  57. default="",
  58. hidden=True,
  59. help="Serve /debug/pprof/* on this address (e.g. 127.0.0.1:6060).",
  60. )
  61. @click.help_option("-h", "--help")
  62. def run(path: str | None = None, pprof: str = "") -> None:
  63. """Launch the LEET TUI.
  64. LEET is a terminal UI for viewing a W&B run specified by an optional PATH.
  65. PATH can include a .wandb file or a run directory containing a .wandb file.
  66. If PATH is not provided, the command will look for the latest run.
  67. """
  68. from . import beta_leet
  69. beta_leet.launch(path, pprof)
  70. @leet.command()
  71. @click.option(
  72. "--pprof",
  73. default="",
  74. hidden=True,
  75. help="Serve /debug/pprof/* on this address (e.g. 127.0.0.1:6060).",
  76. )
  77. @click.option(
  78. "--interval",
  79. default="",
  80. metavar="DURATION",
  81. help="Sampling interval for system metrics (e.g. 500ms, 2s, 1m).",
  82. )
  83. @click.help_option("-h", "--help")
  84. def symon(pprof: str = "", interval: str = "") -> None:
  85. """Launch the standalone system monitor."""
  86. from . import beta_leet
  87. beta_leet.launch_symon(pprof=pprof, interval=interval)
  88. @leet.command()
  89. def config() -> None:
  90. """Edit LEET configuration."""
  91. from . import beta_leet
  92. beta_leet.launch_config()
  93. @beta.command()
  94. @click.argument("paths", type=click.Path(exists=True), nargs=-1)
  95. @click.option(
  96. "--live",
  97. is_flag=True,
  98. default=False,
  99. help="""Sync a run while it's still being logged.
  100. This may hang if the process generating the run crashes uncleanly.
  101. """,
  102. )
  103. @click.option(
  104. "-e",
  105. "--entity",
  106. default="",
  107. help="An entity override to use for all runs being synced.",
  108. )
  109. @click.option(
  110. "-p",
  111. "--project",
  112. default="",
  113. help="A project override to use for all runs being synced.",
  114. )
  115. @click.option(
  116. "--id",
  117. "run_id",
  118. default="",
  119. help="""A run ID override to use for all runs being synced.
  120. If setting this and syncing multiple files (with the same entity
  121. and project), the files will be synced in order of start time.
  122. This is intended to work with syncing multiple resumed fragments
  123. of the same run.
  124. """,
  125. )
  126. @click.option(
  127. "--job-type",
  128. default="",
  129. help="A job type override for all runs being synced.",
  130. )
  131. @click.option(
  132. "--replace-tags",
  133. default="",
  134. help="Rename tags using the format 'old1=new1,old2=new2'.",
  135. )
  136. @click.option(
  137. "--skip-synced/--no-skip-synced",
  138. is_flag=True,
  139. default=True,
  140. help="Skip runs that have already been synced with this command.",
  141. )
  142. @click.option(
  143. "--dry-run",
  144. is_flag=True,
  145. default=False,
  146. help="Print what would happen without uploading anything.",
  147. )
  148. @click.option(
  149. "-v",
  150. "--verbose",
  151. is_flag=True,
  152. default=False,
  153. help="Print more information.",
  154. )
  155. @click.option(
  156. "-n",
  157. default=5,
  158. help="""Max number of runs to sync at a time.
  159. When syncing multiple files that are part of the same run,
  160. the files are synced sequentially in order of start time
  161. regardless of this setting. This happens for resumed runs
  162. or when using the --id parameter.
  163. """,
  164. )
  165. def sync(
  166. paths: tuple[str, ...],
  167. live: bool,
  168. entity: str,
  169. project: str,
  170. run_id: str,
  171. job_type: str,
  172. replace_tags: str,
  173. skip_synced: bool,
  174. dry_run: bool,
  175. verbose: bool,
  176. n: int,
  177. ) -> None:
  178. """Upload .wandb files specified by PATHS.
  179. This is a beta re-implementation of `wandb sync`.
  180. It is not feature complete, not guaranteed to work, and may change
  181. in backward-incompatible ways in any release of wandb.
  182. PATHS can include .wandb files, run directories containing .wandb files,
  183. and "wandb" directories containing run directories.
  184. For example, to sync all runs in the current .wandb directory:
  185. $ wandb beta sync ./wandb
  186. To sync a specific run by specifying the run directory:
  187. $ wandb beta sync ./wandb/run-20250813_124246-n67z9ude
  188. Or equivalently:
  189. $ wandb beta sync ./wandb/run-20250813_124246-n67z9ude/run-n67z9ude.wandb
  190. """
  191. from . import beta_sync
  192. beta_sync.sync(
  193. [pathlib.Path(path) for path in paths],
  194. live=live,
  195. entity=entity,
  196. project=project,
  197. run_id=run_id,
  198. job_type=job_type,
  199. replace_tags=replace_tags,
  200. dry_run=dry_run,
  201. skip_synced=skip_synced,
  202. verbose=verbose,
  203. parallelism=n,
  204. )
  205. @beta.group()
  206. def core() -> None:
  207. """Manage a shared local wandb-core service for multi-process workloads.
  208. wandb-core is the local backend process that handles run data,
  209. file uploads, and system metrics collection. By default, each
  210. process that calls `wandb.init()` starts its own backend. On a
  211. machine running many independent workers, that duplicates work
  212. and wastes CPU and memory.
  213. Use these commands to start one detached wandb-core instance and
  214. point multiple workers on the same machine at it with the
  215. WANDB_SERVICE environment variable.
  216. Typical workflow:
  217. $ wandb beta core start
  218. $ export WANDB_SERVICE=printed_value
  219. $ python -m your_launcher
  220. $ wandb beta core stop
  221. For shell scripts, capture the raw WANDB_SERVICE value from stdout:
  222. $ export WANDB_SERVICE="$(wandb beta core start)"
  223. The shared service exits after 10 minutes of idleness by default.
  224. Override this with --idle-timeout on the start command.
  225. """
  226. @core.command()
  227. @click.option(
  228. "--idle-timeout",
  229. default="10m",
  230. show_default=True,
  231. metavar="DURATION",
  232. help=(
  233. "Shut down wandb-core after this much idle time with no connected "
  234. "clients. Uses Go duration syntax, for example 30s, 10m, or 0 to "
  235. "disable idle shutdown."
  236. ),
  237. )
  238. def start(idle_timeout: str) -> None:
  239. """Start a detached wandb-core service."""
  240. from . import beta_core
  241. beta_core.start(idle_timeout=idle_timeout)
  242. @core.command()
  243. def stop() -> None:
  244. """Stop a detached wandb-core service.
  245. The service address is taken from the WANDB_SERVICE environment variable.
  246. """
  247. from . import beta_core
  248. beta_core.stop()