| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- from __future__ import annotations
- import re
- from base64 import urlsafe_b64encode
- from typing import Any, Final
- from zlib import crc32
- from wandb.sdk.artifacts.artifact import Artifact
- PLACEHOLDER: Final[str] = "PLACEHOLDER"
- def sanitize_artifact_name(name: str) -> str:
- """Sanitize the string to satisfy constraints on artifact names."""
- # If the name is already sanitized, don't change it.
- if (sanitized := re.sub(r"[^a-zA-Z0-9_\-.]+", "", name)) == name:
- return name
- # Append a short alphanumeric suffix to maintain uniqueness.
- # Yes, CRC is meant for checksums and not as a general hash function, but
- # a 32-bit CRC hash, encoded as (url-safe) base64, is fairly short while
- # providing 4B+ possible values, which should be good enough for the corner
- # case names this function is meant to address.
- #
- # As implemented, the final suffix should be 6 characters.
- crc: int = crc32(name.encode("utf-8")) & 0xFFFFFFFF # Ensure it's unsigned
- crc_bytes = crc.to_bytes(4, byteorder="big")
- suffix = urlsafe_b64encode(crc_bytes).rstrip(b"=").decode("ascii")
- return f"{sanitized}-{suffix}"
- class InternalArtifact(Artifact):
- """An Artifact intended for internal use only.
- Includes artifacts of type `job`, `code` (with a `source-` collection name
- prefix), `run_table` (with a `run-` collection name prefix), and any type that starts
- with `wandb-`. Users should not use this class directly.
- """
- def __init__(
- self,
- name: str,
- type: str,
- description: str | None = None,
- metadata: dict[str, Any] | None = None,
- incremental: bool = False,
- use_as: str | None = None,
- ) -> None:
- sanitized_name = sanitize_artifact_name(name)
- super().__init__(
- sanitized_name, PLACEHOLDER, description, metadata, incremental, use_as
- )
- self._type = type
|