yichael
/
image-match


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
							"""Transform classes for dropout-based augmentations.

This module contains transform classes for various dropout techniques used in image
augmentation. It provides the base dropout class and specialized implementations like
PixelDropout. These transforms randomly remove or modify pixels, channels, or regions
in images, which can help models become more robust to occlusions and missing information.
"""

from __future__ import annotations

from typing import Any, Literal, cast

import numpy as np
from albucore import get_num_channels
from pydantic import Field

from albumentations.augmentations.dropout import functional as fdropout
from albumentations.augmentations.dropout.functional import (
    cutout,
    cutout_on_volume,
    cutout_on_volumes,
    filter_bboxes_by_holes,
    filter_keypoints_in_holes,
)
from albumentations.augmentations.pixel import functional as fpixel
from albumentations.core.bbox_utils import BboxProcessor, denormalize_bboxes, normalize_bboxes
from albumentations.core.keypoints_utils import KeypointsProcessor
from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
from albumentations.core.type_definitions import ALL_TARGETS, Targets

__all__ = ["PixelDropout"]


class BaseDropout(DualTransform):
    """Base class for dropout-style transformations.

    This class provides common functionality for various dropout techniques,
    including applying cutouts to images and masks.

    Args:
        fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
            Value to fill dropped regions.
        fill_mask (tuple[float, ...] | float | None): Value to fill
            dropped regions in the mask. If None, the mask is not modified.
        p (float): Probability of applying the transform.

    Targets:
        image, mask, bboxes, keypoints, volume, mask3d

    Image types:
        uint8, float32

    Examples:
        >>> import numpy as np
        >>> import albumentations as A
        >>>
        >>> # Example of a custom dropout transform inheriting from BaseDropout
        >>> class CustomDropout(A.BaseDropout):
        ...     def __init__(self, num_holes_range=(4, 8), hole_size_range=(10, 20), *args, **kwargs):
        ...         super().__init__(*args, **kwargs)
        ...         self.num_holes_range = num_holes_range
        ...         self.hole_size_range = hole_size_range
        ...
        ...     def get_params_dependent_on_data(self, params, data):
        ...         img = data["image"]
        ...         height, width = img.shape[:2]
        ...
        ...         # Generate random holes
        ...         num_holes = self.py_random.randint(*self.num_holes_range)
        ...         hole_sizes = self.py_random.randint(*self.hole_size_range, size=num_holes)
        ...
        ...         holes = []
        ...         for i in range(num_holes):
        ...             # Random position for each hole
        ...             x1 = self.py_random.randint(0, max(1, width - hole_sizes[i]))
        ...             y1 = self.py_random.randint(0, max(1, height - hole_sizes[i]))
        ...             x2 = min(width, x1 + hole_sizes[i])
        ...             y2 = min(height, y1 + hole_sizes[i])
        ...             holes.append([x1, y1, x2, y2])
        ...
        ...         # Return holes and random seed
        ...         return {
        ...             "holes": np.array(holes) if holes else np.empty((0, 4), dtype=np.int32),
        ...             "seed": self.py_random.integers(0, 100000)
        ...         }
        >>>
        >>> # Prepare sample data
        >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
        >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
        >>> bboxes = np.array([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.9, 0.9]])
        >>>
        >>> # Create a transform with custom dropout
        >>> transform = A.Compose([
        ...     CustomDropout(
        ...         num_holes_range=(3, 6),       # Generate 3-6 random holes
        ...         hole_size_range=(5, 15),      # Holes of size 5-15 pixels
        ...         fill=0,                       # Fill holes with black
        ...         fill_mask=1,                  # Fill mask holes with 1
        ...         p=1.0                         # Always apply for this example
        ...     )
        ... ], bbox_params=A.BboxParams(format='yolo', min_visibility=0.3))
        >>>
        >>> # Apply the transform
        >>> transformed = transform(image=image, mask=mask, bboxes=bboxes)
        >>>
        >>> # Get the transformed data
        >>> dropout_image = transformed["image"]      # Image with random holes filled with 0
        >>> dropout_mask = transformed["mask"]        # Mask with same holes filled with 1
        >>> dropout_bboxes = transformed["bboxes"]    # Bboxes filtered by visibility threshold

    """

    _targets: tuple[Targets, ...] | Targets = ALL_TARGETS

    class InitSchema(BaseTransformInitSchema):
        fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]
        fill_mask: tuple[float, ...] | float | None

    def __init__(
        self,
        fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
        fill_mask: tuple[float, ...] | float | None,
        p: float,
    ):
        super().__init__(p=p)
        self.fill = fill  # type: ignore[assignment]
        self.fill_mask = fill_mask

    def apply(self, img: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
        if holes.size == 0:
            return img
        if self.fill in {"inpaint_telea", "inpaint_ns"}:
            num_channels = get_num_channels(img)
            if num_channels not in {1, 3}:
                raise ValueError("Inpainting works only for 1 or 3 channel images")
        return cutout(img, holes, self.fill, np.random.default_rng(seed))

    def apply_to_images(self, images: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
        if holes.size == 0:
            return images
        if self.fill in {"inpaint_telea", "inpaint_ns"}:
            num_channels = images.shape[3] if images.ndim == 4 else 1
            if num_channels not in {1, 3}:
                raise ValueError("Inpainting works only for 1 or 3 channel images")
        # Images (N, H, W, C) have the same structure as volumes (D, H, W, C)
        return cutout_on_volume(images, holes, self.fill, np.random.default_rng(seed))

    def apply_to_volume(self, volume: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
        # Volume (D, H, W, C) has the same structure as images (N, H, W, C)
        # We can reuse the same logic
        return self.apply_to_images(volume, holes, seed, **params)

    def apply_to_volumes(self, volumes: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
        if holes.size == 0:
            return volumes
        if self.fill in {"inpaint_telea", "inpaint_ns"}:
            num_channels = volumes.shape[4] if volumes.ndim == 5 else 1
            if num_channels not in {1, 3}:
                raise ValueError("Inpainting works only for 1 or 3 channel images")
        return cutout_on_volumes(volumes, holes, self.fill, np.random.default_rng(seed))

    def apply_to_mask3d(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
        if self.fill_mask is None or holes.size == 0:
            return mask
        return cutout_on_volume(mask, holes, self.fill_mask, np.random.default_rng(seed))

    def apply_to_masks3d(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
        if self.fill_mask is None or holes.size == 0:
            return mask
        return cutout_on_volumes(mask, holes, self.fill_mask, np.random.default_rng(seed))

    def apply_to_mask(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
        if self.fill_mask is None or holes.size == 0:
            return mask
        return cutout(mask, holes, self.fill_mask, np.random.default_rng(seed))

    def apply_to_bboxes(
        self,
        bboxes: np.ndarray,
        holes: np.ndarray,
        **params: Any,
    ) -> np.ndarray:
        if holes.size == 0:
            return bboxes
        processor = cast("BboxProcessor", self.get_processor("bboxes"))
        if processor is None:
            return bboxes

        image_shape = params["shape"][:2]
        denormalized_bboxes = denormalize_bboxes(bboxes, image_shape)

        return normalize_bboxes(
            filter_bboxes_by_holes(
                denormalized_bboxes,
                holes,
                image_shape,
                min_area=processor.params.min_area,
                min_visibility=processor.params.min_visibility,
            ),
            image_shape,
        )

    def apply_to_keypoints(
        self,
        keypoints: np.ndarray,
        holes: np.ndarray,
        **params: Any,
    ) -> np.ndarray:
        if holes.size == 0:
            return keypoints
        processor = cast("KeypointsProcessor", self.get_processor("keypoints"))

        if processor is None or not processor.params.remove_invisible:
            return keypoints

        return filter_keypoints_in_holes(keypoints, holes)

    def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
        raise NotImplementedError("Subclasses must implement this method.")


class PixelDropout(DualTransform):
    """Drops random pixels from the image.

    This transform randomly sets pixels in the image to a specified value, effectively "dropping out" those pixels.
    It can be applied to both the image and its corresponding mask.

    Args:
        dropout_prob (float): Probability of dropping out each pixel. Should be in the range [0, 1].
            Default: 0.01

        per_channel (bool): If True, the dropout mask will be generated independently for each channel.
            If False, the same dropout mask will be applied to all channels.
            Default: False

        drop_value (float | tuple[float, ...] | None): Value to assign to the dropped pixels.
            If None, the value will be randomly sampled for each application:
                - For uint8 images: Random integer in [0, 255]
                - For float32 images: Random float in [0, 1]
            If a single number, that value will be used for all dropped pixels.
            If a sequence, it should contain one value per channel.
            Default: 0

        mask_drop_value (float | tuple[float, ...] | None): Value to assign to dropped pixels in the mask.
            If None, the mask will remain unchanged.
            If a single number, that value will be used for all dropped pixels in the mask.
            If a sequence, it should contain one value per channel.
            Default: None

        p (float): Probability of applying the transform. Should be in the range [0, 1].
            Default: 0.5

    Targets:
        image, mask, bboxes, keypoints, volume, mask3d

    Image types:
        uint8, float32

    Note:
        - When applied to bounding boxes, this transform may cause some boxes to have zero area
          if all pixels within the box are dropped. Such boxes will be removed.
        - When applied to keypoints, keypoints that fall on dropped pixels will be removed if
          the keypoint processor is configured to remove invisible keypoints.

    Examples:
        >>> import numpy as np
        >>> import albumentations as A
        >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
        >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
        >>> transform = A.PixelDropout(dropout_prob=0.1, per_channel=True, p=1.0)
        >>> result = transform(image=image, mask=mask)
        >>> dropped_image, dropped_mask = result['image'], result['mask']

    """

    class InitSchema(BaseTransformInitSchema):
        dropout_prob: float = Field(ge=0, le=1)
        per_channel: bool
        drop_value: tuple[float, ...] | float | None
        mask_drop_value: tuple[float, ...] | float | None

    _targets = ALL_TARGETS

    def __init__(
        self,
        dropout_prob: float = 0.01,
        per_channel: bool = False,
        drop_value: tuple[float, ...] | float | None = 0,
        mask_drop_value: tuple[float, ...] | float | None = None,
        p: float = 0.5,
    ):
        super().__init__(p=p)
        self.dropout_prob = dropout_prob
        self.per_channel = per_channel
        self.drop_value = drop_value
        self.mask_drop_value = mask_drop_value

    def apply(
        self,
        img: np.ndarray,
        drop_mask: np.ndarray,
        drop_values: np.ndarray,
        **params: Any,
    ) -> np.ndarray:
        """Apply pixel dropout to the image.

        Args:
            img (np.ndarray): The image to apply the transform to.
            drop_mask (np.ndarray): The dropout mask.
            drop_values (np.ndarray): The values to assign to the dropped pixels.
            **params (Any): Additional parameters for the transform.

        Returns:
            np.ndarray: The transformed image.

        """
        return fpixel.pixel_dropout(img, drop_mask, drop_values)

    def apply_to_mask(
        self,
        mask: np.ndarray,
        mask_drop_mask: np.ndarray,
        mask_drop_values: float | np.ndarray,
        **params: Any,
    ) -> np.ndarray:
        """Apply pixel dropout to the mask.

        Args:
            mask (np.ndarray): The mask to apply the transform to.
            mask_drop_mask (np.ndarray): The dropout mask for the mask.
            mask_drop_values (float | np.ndarray): The values to assign to the dropped pixels in the mask.
            **params (Any): Additional parameters for the transform.

        Returns:
            np.ndarray: The transformed mask.

        """
        if self.mask_drop_value is None:
            return mask

        return fpixel.pixel_dropout(mask, mask_drop_mask, mask_drop_values)

    def apply_to_bboxes(
        self,
        bboxes: np.ndarray,
        drop_mask: np.ndarray | None,
        **params: Any,
    ) -> np.ndarray:
        """Apply pixel dropout to the bounding boxes.

        Args:
            bboxes (np.ndarray): The bounding boxes to apply the transform to.
            drop_mask (np.ndarray | None): The dropout mask for the bounding boxes.
            **params (Any): Additional parameters for the transform.

        Returns:
            np.ndarray: The transformed bounding boxes.

        """
        if drop_mask is None or self.per_channel:
            return bboxes

        processor = cast("BboxProcessor", self.get_processor("bboxes"))
        if processor is None:
            return bboxes

        image_shape = params["shape"][:2]

        denormalized_bboxes = denormalize_bboxes(bboxes, image_shape)

        # If per_channel is True, we need to create a single channel mask
        # by combining the multi-channel mask (considering a pixel dropped if it's dropped in any channel)
        if self.per_channel and len(drop_mask.shape) > 2:
            # Create a single channel mask where a pixel is considered dropped if it's dropped in any channel
            combined_mask = np.any(drop_mask, axis=-1 if drop_mask.shape[-1] <= 4 else 0)
            # Ensure the mask has the right shape for the bboxes function
            if combined_mask.ndim == 3 and combined_mask.shape[0] == 1:
                combined_mask = combined_mask[0]
        else:
            combined_mask = drop_mask

        result = fdropout.mask_dropout_bboxes(
            denormalized_bboxes,
            combined_mask,
            image_shape,
            processor.params.min_area,
            processor.params.min_visibility,
        )

        return normalize_bboxes(result, image_shape)

    def apply_to_keypoints(
        self,
        keypoints: np.ndarray,
        **params: Any,
    ) -> np.ndarray:
        """Apply pixel dropout to the keypoints.

        Args:
            keypoints (np.ndarray): The keypoints to apply the transform to.
            **params (Any): Additional parameters for the transform.

        Returns:
            np.ndarray: The transformed keypoints.

        """
        return keypoints

    def get_params_dependent_on_data(
        self,
        params: dict[str, Any],
        data: dict[str, Any],
    ) -> dict[str, Any]:
        """Generate parameters for pixel dropout based on input data.

        Args:
            params (dict[str, Any]): Transform parameters
            data (dict[str, Any]): Input data dictionary

        Returns:
            dict[str, Any]: Dictionary of parameters for applying the transform

        """
        reference_array = data["image"] if "image" in data else data["images"][0]

        # Generate drop mask and values for all targets
        drop_mask = fpixel.get_drop_mask(
            reference_array.shape,
            self.per_channel,
            self.dropout_prob,
            self.random_generator,
        )
        drop_values = fpixel.prepare_drop_values(
            reference_array,
            self.drop_value,
            self.random_generator,
        )

        # Handle mask drop values if specified
        mask_drop_mask = None
        mask_drop_values = None
        mask = fpixel.get_mask_array(data)
        if self.mask_drop_value is not None and mask is not None:
            mask_drop_mask = fpixel.get_drop_mask(
                mask.shape,
                self.per_channel,
                self.dropout_prob,
                self.random_generator,
            )
            mask_drop_values = fpixel.prepare_drop_values(
                mask,
                self.mask_drop_value,
                self.random_generator,
            )

        return {
            "drop_mask": drop_mask,
            "drop_values": drop_values,
            "mask_drop_mask": mask_drop_mask if mask_drop_mask is not None else None,
            "mask_drop_values": mask_drop_values if mask_drop_values is not None else None,
        }