_internal_artifact.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. from __future__ import annotations
  2. import re
  3. from base64 import urlsafe_b64encode
  4. from typing import Any, Final
  5. from zlib import crc32
  6. from wandb.sdk.artifacts.artifact import Artifact
  7. PLACEHOLDER: Final[str] = "PLACEHOLDER"
  8. def sanitize_artifact_name(name: str) -> str:
  9. """Sanitize the string to satisfy constraints on artifact names."""
  10. # If the name is already sanitized, don't change it.
  11. if (sanitized := re.sub(r"[^a-zA-Z0-9_\-.]+", "", name)) == name:
  12. return name
  13. # Append a short alphanumeric suffix to maintain uniqueness.
  14. # Yes, CRC is meant for checksums and not as a general hash function, but
  15. # a 32-bit CRC hash, encoded as (url-safe) base64, is fairly short while
  16. # providing 4B+ possible values, which should be good enough for the corner
  17. # case names this function is meant to address.
  18. #
  19. # As implemented, the final suffix should be 6 characters.
  20. crc: int = crc32(name.encode("utf-8")) & 0xFFFFFFFF # Ensure it's unsigned
  21. crc_bytes = crc.to_bytes(4, byteorder="big")
  22. suffix = urlsafe_b64encode(crc_bytes).rstrip(b"=").decode("ascii")
  23. return f"{sanitized}-{suffix}"
  24. class InternalArtifact(Artifact):
  25. """An Artifact intended for internal use only.
  26. Includes artifacts of type `job`, `code` (with a `source-` collection name
  27. prefix), `run_table` (with a `run-` collection name prefix), and any type that starts
  28. with `wandb-`. Users should not use this class directly.
  29. """
  30. def __init__(
  31. self,
  32. name: str,
  33. type: str,
  34. description: str | None = None,
  35. metadata: dict[str, Any] | None = None,
  36. incremental: bool = False,
  37. use_as: str | None = None,
  38. ) -> None:
  39. sanitized_name = sanitize_artifact_name(name)
  40. super().__init__(
  41. sanitized_name, PLACEHOLDER, description, metadata, incremental, use_as
  42. )
  43. self._type = type