from __future__ import annotations import re from base64 import urlsafe_b64encode from typing import Any, Final from zlib import crc32 from wandb.sdk.artifacts.artifact import Artifact PLACEHOLDER: Final[str] = "PLACEHOLDER" def sanitize_artifact_name(name: str) -> str: """Sanitize the string to satisfy constraints on artifact names.""" # If the name is already sanitized, don't change it. if (sanitized := re.sub(r"[^a-zA-Z0-9_\-.]+", "", name)) == name: return name # Append a short alphanumeric suffix to maintain uniqueness. # Yes, CRC is meant for checksums and not as a general hash function, but # a 32-bit CRC hash, encoded as (url-safe) base64, is fairly short while # providing 4B+ possible values, which should be good enough for the corner # case names this function is meant to address. # # As implemented, the final suffix should be 6 characters. crc: int = crc32(name.encode("utf-8")) & 0xFFFFFFFF # Ensure it's unsigned crc_bytes = crc.to_bytes(4, byteorder="big") suffix = urlsafe_b64encode(crc_bytes).rstrip(b"=").decode("ascii") return f"{sanitized}-{suffix}" class InternalArtifact(Artifact): """An Artifact intended for internal use only. Includes artifacts of type `job`, `code` (with a `source-` collection name prefix), `run_table` (with a `run-` collection name prefix), and any type that starts with `wandb-`. Users should not use this class directly. """ def __init__( self, name: str, type: str, description: str | None = None, metadata: dict[str, Any] | None = None, incremental: bool = False, use_as: str | None = None, ) -> None: sanitized_name = sanitize_artifact_name(name) super().__init__( sanitized_name, PLACEHOLDER, description, metadata, incremental, use_as ) self._type = type