docker_builder.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. """Implementation of the docker builder."""
  2. from __future__ import annotations
  3. import logging
  4. import os
  5. from typing import Any
  6. import wandb
  7. import wandb.docker as docker
  8. from wandb.sdk.launch.agent.job_status_tracker import JobAndRunStatusTracker
  9. from wandb.sdk.launch.builder.abstract import AbstractBuilder, registry_from_uri
  10. from wandb.sdk.launch.environment.abstract import AbstractEnvironment
  11. from wandb.sdk.launch.registry.abstract import AbstractRegistry
  12. from .._project_spec import EntryPoint, LaunchProject
  13. from ..errors import LaunchDockerError, LaunchError
  14. from ..registry.anon import AnonynmousRegistry
  15. from ..registry.local_registry import LocalRegistry
  16. from ..utils import (
  17. LOG_PREFIX,
  18. event_loop_thread_exec,
  19. warn_failed_packages_from_build_logs,
  20. )
  21. from .build import _WANDB_DOCKERFILE_NAME, validate_docker_installation
  22. from .context_manager import BuildContextManager
  23. _logger = logging.getLogger(__name__)
  24. class DockerBuilder(AbstractBuilder):
  25. """Builds a docker image for a project.
  26. Attributes:
  27. builder_config (Dict[str, Any]): The builder config.
  28. """
  29. builder_type = "docker"
  30. target_platform = "linux/amd64"
  31. def __init__(
  32. self,
  33. environment: AbstractEnvironment,
  34. registry: AbstractRegistry,
  35. config: dict[str, Any],
  36. ):
  37. """Initialize a DockerBuilder.
  38. Arguments:
  39. environment (AbstractEnvironment): The environment to use.
  40. registry (AbstractRegistry): The registry to use.
  41. Raises:
  42. LaunchError: If docker is not installed
  43. """
  44. self.environment = environment # Docker builder doesn't actually use this.
  45. self.registry = registry
  46. self.config = config
  47. @classmethod
  48. def from_config(
  49. cls,
  50. config: dict[str, Any],
  51. environment: AbstractEnvironment,
  52. registry: AbstractRegistry,
  53. ) -> DockerBuilder:
  54. """Create a DockerBuilder from a config.
  55. Arguments:
  56. config (Dict[str, Any]): The config.
  57. registry (AbstractRegistry): The registry to use.
  58. verify (bool, optional): Whether to verify the functionality of the builder.
  59. login (bool, optional): Whether to login to the registry.
  60. Returns:
  61. DockerBuilder: The DockerBuilder.
  62. """
  63. # If the user provided a destination URI in the builder config
  64. # we use that as the registry.
  65. image_uri = config.get("destination")
  66. if image_uri:
  67. if registry is not None:
  68. wandb.termwarn(
  69. f"{LOG_PREFIX}Overriding registry from registry config"
  70. f" with {image_uri} from builder config."
  71. )
  72. registry = registry_from_uri(image_uri)
  73. return cls(environment, registry, config)
  74. async def verify(self) -> None:
  75. """Verify the builder."""
  76. await validate_docker_installation()
  77. async def login(self) -> None:
  78. """Login to the registry."""
  79. if isinstance(self.registry, LocalRegistry):
  80. _logger.info(f"{LOG_PREFIX}No registry configured, skipping login.")
  81. elif isinstance(self.registry, AnonynmousRegistry):
  82. _logger.info(f"{LOG_PREFIX}Anonymous registry, skipping login.")
  83. else:
  84. username, password = await self.registry.get_username_password()
  85. login = event_loop_thread_exec(docker.login)
  86. await login(username, password, self.registry.uri)
  87. async def build_image(
  88. self,
  89. launch_project: LaunchProject,
  90. entrypoint: EntryPoint,
  91. job_tracker: JobAndRunStatusTracker | None = None,
  92. ) -> str:
  93. """Build the image for the given project.
  94. Arguments:
  95. launch_project (LaunchProject): The project to build.
  96. entrypoint (EntryPoint): The entrypoint to use.
  97. """
  98. await self.verify()
  99. await self.login()
  100. build_context_manager = BuildContextManager(launch_project=launch_project)
  101. build_ctx_path, image_tag = build_context_manager.create_build_context("docker")
  102. dockerfile = os.path.join(build_ctx_path, _WANDB_DOCKERFILE_NAME)
  103. repository = None if not self.registry else await self.registry.get_repo_uri()
  104. # if repo is set, use the repo name as the image name
  105. if repository:
  106. image_uri = f"{repository}:{image_tag}"
  107. # otherwise, base the image name off of the source
  108. # which the launch_project checks in image_name
  109. else:
  110. image_uri = f"{launch_project.image_name}:{image_tag}"
  111. if (
  112. not launch_project.build_required()
  113. and await self.registry.check_image_exists(image_uri)
  114. ):
  115. return image_uri
  116. _logger.info(
  117. f"image {image_uri} does not already exist in repository, building."
  118. )
  119. try:
  120. output = await event_loop_thread_exec(docker.build)(
  121. tags=[image_uri],
  122. file=dockerfile,
  123. context_path=build_ctx_path,
  124. platform=self.config.get("platform"),
  125. )
  126. warn_failed_packages_from_build_logs(
  127. output, image_uri, launch_project.api, job_tracker
  128. )
  129. except docker.DockerError as e:
  130. if job_tracker:
  131. job_tracker.set_err_stage("build")
  132. raise LaunchDockerError(f"Error communicating with docker client: {e}")
  133. try:
  134. os.remove(build_ctx_path)
  135. except Exception:
  136. _msg = f"{LOG_PREFIX}Temporary docker context file {build_ctx_path} was not deleted."
  137. _logger.info(_msg)
  138. if repository:
  139. reg, tag = image_uri.split(":")
  140. wandb.termlog(f"{LOG_PREFIX}Pushing image {image_uri}")
  141. push_resp = await event_loop_thread_exec(docker.push)(reg, tag)
  142. if push_resp is None:
  143. raise LaunchError("Failed to push image to repository")
  144. elif (
  145. launch_project.resource == "sagemaker"
  146. and f"The push refers to repository [{repository}]" not in push_resp
  147. ):
  148. raise LaunchError(f"Unable to push image to ECR, response: {push_resp}")
  149. return image_uri