| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461 |
- """Transform classes for dropout-based augmentations.
- This module contains transform classes for various dropout techniques used in image
- augmentation. It provides the base dropout class and specialized implementations like
- PixelDropout. These transforms randomly remove or modify pixels, channels, or regions
- in images, which can help models become more robust to occlusions and missing information.
- """
- from __future__ import annotations
- from typing import Any, Literal, cast
- import numpy as np
- from albucore import get_num_channels
- from pydantic import Field
- from albumentations.augmentations.dropout import functional as fdropout
- from albumentations.augmentations.dropout.functional import (
- cutout,
- cutout_on_volume,
- cutout_on_volumes,
- filter_bboxes_by_holes,
- filter_keypoints_in_holes,
- )
- from albumentations.augmentations.pixel import functional as fpixel
- from albumentations.core.bbox_utils import BboxProcessor, denormalize_bboxes, normalize_bboxes
- from albumentations.core.keypoints_utils import KeypointsProcessor
- from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
- from albumentations.core.type_definitions import ALL_TARGETS, Targets
- __all__ = ["PixelDropout"]
- class BaseDropout(DualTransform):
- """Base class for dropout-style transformations.
- This class provides common functionality for various dropout techniques,
- including applying cutouts to images and masks.
- Args:
- fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
- Value to fill dropped regions.
- fill_mask (tuple[float, ...] | float | None): Value to fill
- dropped regions in the mask. If None, the mask is not modified.
- p (float): Probability of applying the transform.
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Examples:
- >>> import numpy as np
- >>> import albumentations as A
- >>>
- >>> # Example of a custom dropout transform inheriting from BaseDropout
- >>> class CustomDropout(A.BaseDropout):
- ... def __init__(self, num_holes_range=(4, 8), hole_size_range=(10, 20), *args, **kwargs):
- ... super().__init__(*args, **kwargs)
- ... self.num_holes_range = num_holes_range
- ... self.hole_size_range = hole_size_range
- ...
- ... def get_params_dependent_on_data(self, params, data):
- ... img = data["image"]
- ... height, width = img.shape[:2]
- ...
- ... # Generate random holes
- ... num_holes = self.py_random.randint(*self.num_holes_range)
- ... hole_sizes = self.py_random.randint(*self.hole_size_range, size=num_holes)
- ...
- ... holes = []
- ... for i in range(num_holes):
- ... # Random position for each hole
- ... x1 = self.py_random.randint(0, max(1, width - hole_sizes[i]))
- ... y1 = self.py_random.randint(0, max(1, height - hole_sizes[i]))
- ... x2 = min(width, x1 + hole_sizes[i])
- ... y2 = min(height, y1 + hole_sizes[i])
- ... holes.append([x1, y1, x2, y2])
- ...
- ... # Return holes and random seed
- ... return {
- ... "holes": np.array(holes) if holes else np.empty((0, 4), dtype=np.int32),
- ... "seed": self.py_random.integers(0, 100000)
- ... }
- >>>
- >>> # Prepare sample data
- >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
- >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
- >>> bboxes = np.array([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.9, 0.9]])
- >>>
- >>> # Create a transform with custom dropout
- >>> transform = A.Compose([
- ... CustomDropout(
- ... num_holes_range=(3, 6), # Generate 3-6 random holes
- ... hole_size_range=(5, 15), # Holes of size 5-15 pixels
- ... fill=0, # Fill holes with black
- ... fill_mask=1, # Fill mask holes with 1
- ... p=1.0 # Always apply for this example
- ... )
- ... ], bbox_params=A.BboxParams(format='yolo', min_visibility=0.3))
- >>>
- >>> # Apply the transform
- >>> transformed = transform(image=image, mask=mask, bboxes=bboxes)
- >>>
- >>> # Get the transformed data
- >>> dropout_image = transformed["image"] # Image with random holes filled with 0
- >>> dropout_mask = transformed["mask"] # Mask with same holes filled with 1
- >>> dropout_bboxes = transformed["bboxes"] # Bboxes filtered by visibility threshold
- """
- _targets: tuple[Targets, ...] | Targets = ALL_TARGETS
- class InitSchema(BaseTransformInitSchema):
- fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]
- fill_mask: tuple[float, ...] | float | None
- def __init__(
- self,
- fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
- fill_mask: tuple[float, ...] | float | None,
- p: float,
- ):
- super().__init__(p=p)
- self.fill = fill # type: ignore[assignment]
- self.fill_mask = fill_mask
- def apply(self, img: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
- if holes.size == 0:
- return img
- if self.fill in {"inpaint_telea", "inpaint_ns"}:
- num_channels = get_num_channels(img)
- if num_channels not in {1, 3}:
- raise ValueError("Inpainting works only for 1 or 3 channel images")
- return cutout(img, holes, self.fill, np.random.default_rng(seed))
- def apply_to_images(self, images: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
- if holes.size == 0:
- return images
- if self.fill in {"inpaint_telea", "inpaint_ns"}:
- num_channels = images.shape[3] if images.ndim == 4 else 1
- if num_channels not in {1, 3}:
- raise ValueError("Inpainting works only for 1 or 3 channel images")
- # Images (N, H, W, C) have the same structure as volumes (D, H, W, C)
- return cutout_on_volume(images, holes, self.fill, np.random.default_rng(seed))
- def apply_to_volume(self, volume: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
- # Volume (D, H, W, C) has the same structure as images (N, H, W, C)
- # We can reuse the same logic
- return self.apply_to_images(volume, holes, seed, **params)
- def apply_to_volumes(self, volumes: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
- if holes.size == 0:
- return volumes
- if self.fill in {"inpaint_telea", "inpaint_ns"}:
- num_channels = volumes.shape[4] if volumes.ndim == 5 else 1
- if num_channels not in {1, 3}:
- raise ValueError("Inpainting works only for 1 or 3 channel images")
- return cutout_on_volumes(volumes, holes, self.fill, np.random.default_rng(seed))
- def apply_to_mask3d(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
- if self.fill_mask is None or holes.size == 0:
- return mask
- return cutout_on_volume(mask, holes, self.fill_mask, np.random.default_rng(seed))
- def apply_to_masks3d(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
- if self.fill_mask is None or holes.size == 0:
- return mask
- return cutout_on_volumes(mask, holes, self.fill_mask, np.random.default_rng(seed))
- def apply_to_mask(self, mask: np.ndarray, holes: np.ndarray, seed: int, **params: Any) -> np.ndarray:
- if self.fill_mask is None or holes.size == 0:
- return mask
- return cutout(mask, holes, self.fill_mask, np.random.default_rng(seed))
- def apply_to_bboxes(
- self,
- bboxes: np.ndarray,
- holes: np.ndarray,
- **params: Any,
- ) -> np.ndarray:
- if holes.size == 0:
- return bboxes
- processor = cast("BboxProcessor", self.get_processor("bboxes"))
- if processor is None:
- return bboxes
- image_shape = params["shape"][:2]
- denormalized_bboxes = denormalize_bboxes(bboxes, image_shape)
- return normalize_bboxes(
- filter_bboxes_by_holes(
- denormalized_bboxes,
- holes,
- image_shape,
- min_area=processor.params.min_area,
- min_visibility=processor.params.min_visibility,
- ),
- image_shape,
- )
- def apply_to_keypoints(
- self,
- keypoints: np.ndarray,
- holes: np.ndarray,
- **params: Any,
- ) -> np.ndarray:
- if holes.size == 0:
- return keypoints
- processor = cast("KeypointsProcessor", self.get_processor("keypoints"))
- if processor is None or not processor.params.remove_invisible:
- return keypoints
- return filter_keypoints_in_holes(keypoints, holes)
- def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
- raise NotImplementedError("Subclasses must implement this method.")
- class PixelDropout(DualTransform):
- """Drops random pixels from the image.
- This transform randomly sets pixels in the image to a specified value, effectively "dropping out" those pixels.
- It can be applied to both the image and its corresponding mask.
- Args:
- dropout_prob (float): Probability of dropping out each pixel. Should be in the range [0, 1].
- Default: 0.01
- per_channel (bool): If True, the dropout mask will be generated independently for each channel.
- If False, the same dropout mask will be applied to all channels.
- Default: False
- drop_value (float | tuple[float, ...] | None): Value to assign to the dropped pixels.
- If None, the value will be randomly sampled for each application:
- - For uint8 images: Random integer in [0, 255]
- - For float32 images: Random float in [0, 1]
- If a single number, that value will be used for all dropped pixels.
- If a sequence, it should contain one value per channel.
- Default: 0
- mask_drop_value (float | tuple[float, ...] | None): Value to assign to dropped pixels in the mask.
- If None, the mask will remain unchanged.
- If a single number, that value will be used for all dropped pixels in the mask.
- If a sequence, it should contain one value per channel.
- Default: None
- p (float): Probability of applying the transform. Should be in the range [0, 1].
- Default: 0.5
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Note:
- - When applied to bounding boxes, this transform may cause some boxes to have zero area
- if all pixels within the box are dropped. Such boxes will be removed.
- - When applied to keypoints, keypoints that fall on dropped pixels will be removed if
- the keypoint processor is configured to remove invisible keypoints.
- Examples:
- >>> import numpy as np
- >>> import albumentations as A
- >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
- >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
- >>> transform = A.PixelDropout(dropout_prob=0.1, per_channel=True, p=1.0)
- >>> result = transform(image=image, mask=mask)
- >>> dropped_image, dropped_mask = result['image'], result['mask']
- """
- class InitSchema(BaseTransformInitSchema):
- dropout_prob: float = Field(ge=0, le=1)
- per_channel: bool
- drop_value: tuple[float, ...] | float | None
- mask_drop_value: tuple[float, ...] | float | None
- _targets = ALL_TARGETS
- def __init__(
- self,
- dropout_prob: float = 0.01,
- per_channel: bool = False,
- drop_value: tuple[float, ...] | float | None = 0,
- mask_drop_value: tuple[float, ...] | float | None = None,
- p: float = 0.5,
- ):
- super().__init__(p=p)
- self.dropout_prob = dropout_prob
- self.per_channel = per_channel
- self.drop_value = drop_value
- self.mask_drop_value = mask_drop_value
- def apply(
- self,
- img: np.ndarray,
- drop_mask: np.ndarray,
- drop_values: np.ndarray,
- **params: Any,
- ) -> np.ndarray:
- """Apply pixel dropout to the image.
- Args:
- img (np.ndarray): The image to apply the transform to.
- drop_mask (np.ndarray): The dropout mask.
- drop_values (np.ndarray): The values to assign to the dropped pixels.
- **params (Any): Additional parameters for the transform.
- Returns:
- np.ndarray: The transformed image.
- """
- return fpixel.pixel_dropout(img, drop_mask, drop_values)
- def apply_to_mask(
- self,
- mask: np.ndarray,
- mask_drop_mask: np.ndarray,
- mask_drop_values: float | np.ndarray,
- **params: Any,
- ) -> np.ndarray:
- """Apply pixel dropout to the mask.
- Args:
- mask (np.ndarray): The mask to apply the transform to.
- mask_drop_mask (np.ndarray): The dropout mask for the mask.
- mask_drop_values (float | np.ndarray): The values to assign to the dropped pixels in the mask.
- **params (Any): Additional parameters for the transform.
- Returns:
- np.ndarray: The transformed mask.
- """
- if self.mask_drop_value is None:
- return mask
- return fpixel.pixel_dropout(mask, mask_drop_mask, mask_drop_values)
- def apply_to_bboxes(
- self,
- bboxes: np.ndarray,
- drop_mask: np.ndarray | None,
- **params: Any,
- ) -> np.ndarray:
- """Apply pixel dropout to the bounding boxes.
- Args:
- bboxes (np.ndarray): The bounding boxes to apply the transform to.
- drop_mask (np.ndarray | None): The dropout mask for the bounding boxes.
- **params (Any): Additional parameters for the transform.
- Returns:
- np.ndarray: The transformed bounding boxes.
- """
- if drop_mask is None or self.per_channel:
- return bboxes
- processor = cast("BboxProcessor", self.get_processor("bboxes"))
- if processor is None:
- return bboxes
- image_shape = params["shape"][:2]
- denormalized_bboxes = denormalize_bboxes(bboxes, image_shape)
- # If per_channel is True, we need to create a single channel mask
- # by combining the multi-channel mask (considering a pixel dropped if it's dropped in any channel)
- if self.per_channel and len(drop_mask.shape) > 2:
- # Create a single channel mask where a pixel is considered dropped if it's dropped in any channel
- combined_mask = np.any(drop_mask, axis=-1 if drop_mask.shape[-1] <= 4 else 0)
- # Ensure the mask has the right shape for the bboxes function
- if combined_mask.ndim == 3 and combined_mask.shape[0] == 1:
- combined_mask = combined_mask[0]
- else:
- combined_mask = drop_mask
- result = fdropout.mask_dropout_bboxes(
- denormalized_bboxes,
- combined_mask,
- image_shape,
- processor.params.min_area,
- processor.params.min_visibility,
- )
- return normalize_bboxes(result, image_shape)
- def apply_to_keypoints(
- self,
- keypoints: np.ndarray,
- **params: Any,
- ) -> np.ndarray:
- """Apply pixel dropout to the keypoints.
- Args:
- keypoints (np.ndarray): The keypoints to apply the transform to.
- **params (Any): Additional parameters for the transform.
- Returns:
- np.ndarray: The transformed keypoints.
- """
- return keypoints
- def get_params_dependent_on_data(
- self,
- params: dict[str, Any],
- data: dict[str, Any],
- ) -> dict[str, Any]:
- """Generate parameters for pixel dropout based on input data.
- Args:
- params (dict[str, Any]): Transform parameters
- data (dict[str, Any]): Input data dictionary
- Returns:
- dict[str, Any]: Dictionary of parameters for applying the transform
- """
- reference_array = data["image"] if "image" in data else data["images"][0]
- # Generate drop mask and values for all targets
- drop_mask = fpixel.get_drop_mask(
- reference_array.shape,
- self.per_channel,
- self.dropout_prob,
- self.random_generator,
- )
- drop_values = fpixel.prepare_drop_values(
- reference_array,
- self.drop_value,
- self.random_generator,
- )
- # Handle mask drop values if specified
- mask_drop_mask = None
- mask_drop_values = None
- mask = fpixel.get_mask_array(data)
- if self.mask_drop_value is not None and mask is not None:
- mask_drop_mask = fpixel.get_drop_mask(
- mask.shape,
- self.per_channel,
- self.dropout_prob,
- self.random_generator,
- )
- mask_drop_values = fpixel.prepare_drop_values(
- mask,
- self.mask_drop_value,
- self.random_generator,
- )
- return {
- "drop_mask": drop_mask,
- "drop_values": drop_values,
- "mask_drop_mask": mask_drop_mask if mask_drop_mask is not None else None,
- "mask_drop_values": mask_drop_values if mask_drop_values is not None else None,
- }
|