| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956 |
- """Transforms for resizing images and associated data.
- This module provides transform classes for resizing operations, including uniform resizing,
- scaling with aspect ratio preservation, and size-constrained transformations.
- """
- from __future__ import annotations
- from collections.abc import Sequence
- from typing import Any, Literal, cast
- import cv2
- import numpy as np
- from albucore import batch_transform
- from pydantic import Field, field_validator, model_validator
- from typing_extensions import Self
- from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
- from albumentations.core.type_definitions import ALL_TARGETS
- from albumentations.core.utils import to_tuple
- from . import functional as fgeometric
- __all__ = ["LongestMaxSize", "RandomScale", "Resize", "SmallestMaxSize"]
- class RandomScale(DualTransform):
- """Randomly resize the input. Output image size is different from the input image size.
- Args:
- scale_limit (float or tuple[float, float]): scaling factor range. If scale_limit is a single float value, the
- range will be (-scale_limit, scale_limit). Note that the scale_limit will be biased by 1.
- If scale_limit is a tuple, like (low, high), sampling will be done from the range (1 + low, 1 + high).
- Default: (-0.1, 0.1).
- interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
- cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_LINEAR.
- mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
- Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_NEAREST.
- area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
- for downscaling. Options:
- - None: No automatic interpolation selection, always use the specified interpolation method
- - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
- - "image_mask": Use INTER_AREA when downscaling both images and masks
- Default: None.
- p (float): probability of applying the transform. Default: 0.5.
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Note:
- - The output image size is different from the input image size.
- - Scale factor is sampled independently per image side (width and height).
- - Bounding box coordinates are scaled accordingly.
- - Keypoint coordinates are scaled accordingly.
- - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
- downscaling (scale < 1.0), which provides better quality for size reduction.
- Mathematical formulation:
- Let (W, H) be the original image dimensions and (W', H') be the output dimensions.
- The scale factor s is sampled from the range [1 + scale_limit[0], 1 + scale_limit[1]].
- Then, W' = W * s and H' = H * s.
- Examples:
- >>> import numpy as np
- >>> import albumentations as A
- >>> import cv2
- >>>
- >>> # Create sample data for demonstration
- >>> image = np.zeros((100, 100, 3), dtype=np.uint8)
- >>> # Add some shapes to visualize scaling effects
- >>> cv2.rectangle(image, (25, 25), (75, 75), (255, 0, 0), -1) # Red square
- >>> cv2.circle(image, (50, 50), 10, (0, 255, 0), -1) # Green circle
- >>>
- >>> # Create a mask for segmentation
- >>> mask = np.zeros((100, 100), dtype=np.uint8)
- >>> mask[25:75, 25:75] = 1 # Mask covering the red square
- >>>
- >>> # Create bounding boxes and keypoints
- >>> bboxes = np.array([[25, 25, 75, 75]]) # Box around the red square
- >>> bbox_labels = [1]
- >>> keypoints = np.array([[50, 50]]) # Center of circle
- >>> keypoint_labels = [0]
- >>>
- >>> # Apply RandomScale transform with comprehensive parameters
- >>> transform = A.Compose([
- ... A.RandomScale(
- ... scale_limit=(-0.3, 0.5), # Scale between 0.7x and 1.5x
- ... interpolation=cv2.INTER_LINEAR,
- ... mask_interpolation=cv2.INTER_NEAREST,
- ... area_for_downscale="image", # Use INTER_AREA for image downscaling
- ... p=1.0 # Always apply
- ... )
- ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
- ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
- >>>
- >>> # Apply the transform to all targets
- >>> result = transform(
- ... image=image,
- ... mask=mask,
- ... bboxes=bboxes,
- ... bbox_labels=bbox_labels,
- ... keypoints=keypoints,
- ... keypoint_labels=keypoint_labels
- ... )
- >>>
- >>> # Get the transformed results
- >>> scaled_image = result['image'] # Dimensions will be between 70-150 pixels
- >>> scaled_mask = result['mask'] # Mask scaled proportionally to image
- >>> scaled_bboxes = result['bboxes'] # Bounding boxes adjusted to new dimensions
- >>> scaled_bbox_labels = result['bbox_labels'] # Labels remain unchanged
- >>> scaled_keypoints = result['keypoints'] # Keypoints adjusted to new dimensions
- >>> scaled_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
- >>>
- >>> # The image dimensions will vary based on the randomly sampled scale factor
- >>> # With scale_limit=(-0.3, 0.5), dimensions could be anywhere from 70% to 150% of original
- """
- _targets = ALL_TARGETS
- class InitSchema(BaseTransformInitSchema):
- scale_limit: tuple[float, float] | float
- area_for_downscale: Literal[None, "image", "image_mask"]
- interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ]
- mask_interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ]
- @field_validator("scale_limit")
- @classmethod
- def _check_scale_limit(cls, v: tuple[float, float] | float) -> tuple[float, float]:
- return to_tuple(v)
- def __init__(
- self,
- scale_limit: tuple[float, float] | float = (-0.1, 0.1),
- interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ] = cv2.INTER_LINEAR,
- mask_interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ] = cv2.INTER_NEAREST,
- area_for_downscale: Literal[None, "image", "image_mask"] = None,
- p: float = 0.5,
- ):
- super().__init__(p=p)
- self.scale_limit = cast("tuple[float, float]", scale_limit)
- self.interpolation = interpolation
- self.mask_interpolation = mask_interpolation
- self.area_for_downscale = area_for_downscale
- def get_params(self) -> dict[str, float]:
- """Get parameters for the transform.
- Returns:
- dict[str, float]: Dictionary with parameters.
- """
- return {"scale": self.py_random.uniform(*self.scale_limit) + 1.0}
- def apply(
- self,
- img: np.ndarray,
- scale: float,
- **params: Any,
- ) -> np.ndarray:
- """Apply scaling to the image.
- Args:
- img (np.ndarray): Image to scale.
- scale (float): Scaling factor.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Scaled image.
- """
- interpolation = self.interpolation
- if self.area_for_downscale in ["image", "image_mask"] and scale < 1.0:
- interpolation = cv2.INTER_AREA
- return fgeometric.scale(img, scale, interpolation)
- def apply_to_mask(
- self,
- mask: np.ndarray,
- scale: float,
- **params: Any,
- ) -> np.ndarray:
- """Apply scaling to the mask.
- Args:
- mask (np.ndarray): Mask to scale.
- scale (float): Scaling factor.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Scaled mask.
- """
- interpolation = self.mask_interpolation
- if self.area_for_downscale == "image_mask" and scale < 1.0:
- interpolation = cv2.INTER_AREA
- return fgeometric.scale(mask, scale, interpolation)
- def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
- """Apply the transform to bounding boxes.
- Args:
- bboxes (np.ndarray): Bounding boxes to transform.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Transformed bounding boxes which are scale invariant.
- """
- # Bounding box coordinates are scale invariant
- return bboxes
- def apply_to_keypoints(
- self,
- keypoints: np.ndarray,
- scale: float,
- **params: Any,
- ) -> np.ndarray:
- """Apply scaling to keypoints.
- Args:
- keypoints (np.ndarray): Keypoints to scale.
- scale (float): Scaling factor.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Scaled keypoints.
- """
- return fgeometric.keypoints_scale(keypoints, scale, scale)
- class MaxSizeTransform(DualTransform):
- """Base class for transforms that resize based on maximum size constraints.
- This class provides common functionality for derived transforms like LongestMaxSize and
- SmallestMaxSize that resize images based on size constraints while preserving aspect ratio.
- Args:
- max_size (int, Sequence[int], optional): Maximum size constraint. The specific interpretation
- depends on the derived class. Default: None.
- max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints.
- Either max_size or max_size_hw must be specified, but not both. Default: None.
- interpolation (OpenCV flag): Flag for the interpolation algorithm. Should be one of:
- cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_LINEAR.
- mask_interpolation (OpenCV flag): Flag for the mask interpolation algorithm.
- Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_NEAREST.
- area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
- for downscaling. Options:
- - None: No automatic interpolation selection, always use the specified interpolation method
- - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
- - "image_mask": Use INTER_AREA when downscaling both images and masks
- Default: None.
- p (float): Probability of applying the transform. Default: 1.
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Note:
- - This is a base class that should be extended by concrete resize transforms.
- - The scaling calculation is implemented in derived classes.
- - Aspect ratio is preserved by applying the same scale factor to both dimensions.
- - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
- downscaling (scale < 1.0), which provides better quality for size reduction.
- Examples:
- >>> import numpy as np
- >>> import albumentations as A
- >>> import cv2
- >>>
- >>> # Example of creating a custom transform that extends MaxSizeTransform
- >>> class CustomMaxSize(A.MaxSizeTransform):
- ... def get_params_dependent_on_data(self, params, data):
- ... img_h, img_w = params["shape"][:2]
- ... # Calculate scale factor - here we scale to make the image area constant
- ... target_area = 300 * 300 # Target area of 300x300
- ... current_area = img_h * img_w
- ... scale = np.sqrt(target_area / current_area)
- ... return {"scale": scale}
- >>>
- >>> # Prepare sample data
- >>> image = np.zeros((100, 200, 3), dtype=np.uint8)
- >>> # Add a rectangle to visualize the effect
- >>> cv2.rectangle(image, (50, 20), (150, 80), (255, 0, 0), -1)
- >>>
- >>> # Create a mask
- >>> mask = np.zeros((100, 200), dtype=np.uint8)
- >>> mask[20:80, 50:150] = 1
- >>>
- >>> # Create bounding boxes and keypoints
- >>> bboxes = np.array([[50, 20, 150, 80]])
- >>> bbox_labels = [1]
- >>> keypoints = np.array([[100, 50]])
- >>> keypoint_labels = [0]
- >>>
- >>> # Apply the custom transform
- >>> transform = A.Compose([
- ... CustomMaxSize(
- ... max_size=None,
- ... max_size_hw=(None, None), # Not used in our custom implementation
- ... interpolation=cv2.INTER_LINEAR,
- ... mask_interpolation=cv2.INTER_NEAREST,
- ... area_for_downscale="image", # Use INTER_AREA when downscaling images
- ... p=1.0
- ... )
- ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
- ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
- >>>
- >>> # Apply the transform
- >>> result = transform(
- ... image=image,
- ... mask=mask,
- ... bboxes=bboxes,
- ... bbox_labels=bbox_labels,
- ... keypoints=keypoints,
- ... keypoint_labels=keypoint_labels
- ... )
- >>>
- >>> # Get results
- >>> transformed_image = result['image'] # Shape will be approximately (122, 245, 3)
- >>> transformed_mask = result['mask'] # Shape will be approximately (122, 245)
- >>> transformed_bboxes = result['bboxes'] # Bounding boxes are scale invariant
- >>> transformed_keypoints = result['keypoints'] # Keypoints scaled proportionally
- >>> transformed_bbox_labels = result['bbox_labels'] # Labels remain unchanged
- >>> transformed_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
- """
- _targets = ALL_TARGETS
- class InitSchema(BaseTransformInitSchema):
- max_size: int | list[int] | None
- max_size_hw: tuple[int | None, int | None] | None
- area_for_downscale: Literal[None, "image", "image_mask"]
- interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ]
- mask_interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ]
- @model_validator(mode="after")
- def validate_size_parameters(self) -> Self:
- if self.max_size is None and self.max_size_hw is None:
- raise ValueError("Either max_size or max_size_hw must be specified")
- if self.max_size is not None and self.max_size_hw is not None:
- raise ValueError("Only one of max_size or max_size_hw should be specified")
- return self
- def __init__(
- self,
- max_size: int | Sequence[int] | None = None,
- max_size_hw: tuple[int | None, int | None] | None = None,
- interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ] = cv2.INTER_LINEAR,
- mask_interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ] = cv2.INTER_NEAREST,
- area_for_downscale: Literal[None, "image", "image_mask"] = None,
- p: float = 1,
- ):
- super().__init__(p=p)
- self.max_size = max_size
- self.max_size_hw = max_size_hw
- self.interpolation = interpolation
- self.mask_interpolation = mask_interpolation
- self.area_for_downscale = area_for_downscale
- def apply(
- self,
- img: np.ndarray,
- scale: float,
- **params: Any,
- ) -> np.ndarray:
- height, width = img.shape[:2]
- new_height, new_width = max(1, round(height * scale)), max(1, round(width * scale))
- interpolation = self.interpolation
- if self.area_for_downscale in ["image", "image_mask"] and scale < 1.0:
- interpolation = cv2.INTER_AREA
- return fgeometric.resize(img, (new_height, new_width), interpolation=interpolation)
- def apply_to_mask(
- self,
- mask: np.ndarray,
- scale: float,
- **params: Any,
- ) -> np.ndarray:
- height, width = mask.shape[:2]
- new_height, new_width = max(1, round(height * scale)), max(1, round(width * scale))
- interpolation = self.mask_interpolation
- if self.area_for_downscale == "image_mask" and scale < 1.0:
- interpolation = cv2.INTER_AREA
- return fgeometric.resize(mask, (new_height, new_width), interpolation=interpolation)
- def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
- # Bounding box coordinates are scale invariant
- return bboxes
- def apply_to_keypoints(
- self,
- keypoints: np.ndarray,
- scale: float,
- **params: Any,
- ) -> np.ndarray:
- return fgeometric.keypoints_scale(keypoints, scale, scale)
- @batch_transform("spatial", has_batch_dim=True, has_depth_dim=False)
- def apply_to_images(self, images: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
- return self.apply(images, *args, **params)
- @batch_transform("spatial", has_batch_dim=False, has_depth_dim=True)
- def apply_to_volume(self, volume: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
- return self.apply(volume, *args, **params)
- @batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
- def apply_to_volumes(self, volumes: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
- return self.apply(volumes, *args, **params)
- @batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
- def apply_to_mask3d(self, mask3d: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
- return self.apply_to_mask(mask3d, *args, **params)
- @batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
- def apply_to_masks3d(self, masks3d: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
- return self.apply_to_mask(masks3d, *args, **params)
- class LongestMaxSize(MaxSizeTransform):
- """Rescale an image so that the longest side is equal to max_size or sides meet max_size_hw constraints,
- keeping the aspect ratio.
- Args:
- max_size (int, Sequence[int], optional): Maximum size of the longest side after the transformation.
- When using a list or tuple, the max size will be randomly selected from the values provided. Default: None.
- max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints. Supports:
- - (height, width): Both dimensions must fit within these bounds
- - (height, None): Only height is constrained, width scales proportionally
- - (None, width): Only width is constrained, height scales proportionally
- If specified, max_size must be None. Default: None.
- interpolation (OpenCV flag): interpolation method. Default: cv2.INTER_LINEAR.
- mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
- Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_NEAREST.
- area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
- for downscaling. Options:
- - None: No automatic interpolation selection, always use the specified interpolation method
- - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
- - "image_mask": Use INTER_AREA when downscaling both images and masks
- Default: None.
- p (float): probability of applying the transform. Default: 1.
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Note:
- - If the longest side of the image is already equal to max_size, the image will not be resized.
- - This transform will not crop the image. The resulting image may be smaller than specified in both dimensions.
- - For non-square images, both sides will be scaled proportionally to maintain the aspect ratio.
- - Bounding boxes and keypoints are scaled accordingly.
- - When area_for_downscale is set, INTER_AREA will be used for downscaling, providing better quality.
- Mathematical Details:
- Let (W, H) be the original width and height of the image.
- When using max_size:
- 1. The scaling factor s is calculated as:
- s = max_size / max(W, H)
- 2. The new dimensions (W', H') are:
- W' = W * s
- H' = H * s
- When using max_size_hw=(H_target, W_target):
- 1. For both dimensions specified:
- s = min(H_target/H, W_target/W)
- This ensures both dimensions fit within the specified bounds.
- 2. For height only (W_target=None):
- s = H_target/H
- Width will scale proportionally.
- 3. For width only (H_target=None):
- s = W_target/W
- Height will scale proportionally.
- 4. The new dimensions (W', H') are:
- W' = W * s
- H' = H * s
- Examples:
- >>> import albumentations as A
- >>> import cv2
- >>> # Using max_size
- >>> transform1 = A.LongestMaxSize(max_size=1024, area_for_downscale="image")
- >>> # Input image (1500, 800) -> Output (1024, 546)
- >>>
- >>> # Using max_size_hw with both dimensions
- >>> transform2 = A.LongestMaxSize(max_size_hw=(800, 1024), area_for_downscale="image_mask")
- >>> # Input (1500, 800) -> Output (800, 427)
- >>> # Input (800, 1500) -> Output (546, 1024)
- >>>
- >>> # Using max_size_hw with only height
- >>> transform3 = A.LongestMaxSize(max_size_hw=(800, None))
- >>> # Input (1500, 800) -> Output (800, 427)
- >>>
- >>> # Common use case with padding
- >>> transform4 = A.Compose([
- ... A.LongestMaxSize(max_size=1024, area_for_downscale="image"),
- ... A.PadIfNeeded(min_height=1024, min_width=1024),
- ... ])
- """
- def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
- """Calculate parameters that depend on the input data.
- Args:
- params (dict[str, Any]): Parameters dictionary.
- data (dict[str, Any]): Dictionary containing input data.
- Returns:
- dict[str, Any]: Dictionary with parameters calculated based on input data.
- """
- img_h, img_w = params["shape"][:2]
- if self.max_size is not None:
- if isinstance(self.max_size, (list, tuple)):
- max_size = self.py_random.choice(self.max_size)
- else:
- max_size = self.max_size
- scale = max_size / max(img_h, img_w)
- elif self.max_size_hw is not None:
- # We know max_size_hw is not None here due to model validator
- max_h, max_w = self.max_size_hw
- if max_h is not None and max_w is not None:
- # Scale based on longest side to maintain aspect ratio
- h_scale = max_h / img_h
- w_scale = max_w / img_w
- scale = min(h_scale, w_scale)
- elif max_h is not None:
- # Only height specified
- scale = max_h / img_h
- else:
- # Only width specified
- scale = max_w / img_w
- return {"scale": scale}
- class SmallestMaxSize(MaxSizeTransform):
- """Rescale an image so that minimum side is equal to max_size or sides meet max_size_hw constraints,
- keeping the aspect ratio.
- Args:
- max_size (int, list of int, optional): Maximum size of smallest side of the image after the transformation.
- When using a list, max size will be randomly selected from the values in the list. Default: None.
- max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints. Supports:
- - (height, width): Both dimensions must be at least these values
- - (height, None): Only height is constrained, width scales proportionally
- - (None, width): Only width is constrained, height scales proportionally
- If specified, max_size must be None. Default: None.
- interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
- cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_LINEAR.
- mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
- Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_NEAREST.
- area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
- for downscaling. Options:
- - None: No automatic interpolation selection, always use the specified interpolation method
- - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
- - "image_mask": Use INTER_AREA when downscaling both images and masks
- Default: None.
- p (float): Probability of applying the transform. Default: 1.
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Note:
- - If the smallest side of the image is already equal to max_size, the image will not be resized.
- - This transform will not crop the image. The resulting image may be larger than specified in both dimensions.
- - For non-square images, both sides will be scaled proportionally to maintain the aspect ratio.
- - Bounding boxes and keypoints are scaled accordingly.
- - When area_for_downscale is set, INTER_AREA will be used for downscaling, providing better quality.
- Mathematical Details:
- Let (W, H) be the original width and height of the image.
- When using max_size:
- 1. The scaling factor s is calculated as:
- s = max_size / min(W, H)
- 2. The new dimensions (W', H') are:
- W' = W * s
- H' = H * s
- When using max_size_hw=(H_target, W_target):
- 1. For both dimensions specified:
- s = max(H_target/H, W_target/W)
- This ensures both dimensions are at least as large as specified.
- 2. For height only (W_target=None):
- s = H_target/H
- Width will scale proportionally.
- 3. For width only (H_target=None):
- s = W_target/W
- Height will scale proportionally.
- 4. The new dimensions (W', H') are:
- W' = W * s
- H' = H * s
- Examples:
- >>> import numpy as np
- >>> import albumentations as A
- >>> # Using max_size
- >>> transform1 = A.SmallestMaxSize(max_size=120, area_for_downscale="image")
- >>> # Input image (100, 150) -> Output (120, 180)
- >>>
- >>> # Using max_size_hw with both dimensions
- >>> transform2 = A.SmallestMaxSize(max_size_hw=(100, 200), area_for_downscale="image_mask")
- >>> # Input (80, 160) -> Output (100, 200)
- >>> # Input (160, 80) -> Output (400, 200)
- >>>
- >>> # Using max_size_hw with only height
- >>> transform3 = A.SmallestMaxSize(max_size_hw=(100, None))
- >>> # Input (80, 160) -> Output (100, 200)
- """
- def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
- """Calculate parameters that depend on the input data.
- Args:
- params (dict[str, Any]): Parameters dictionary.
- data (dict[str, Any]): Dictionary containing input data.
- Returns:
- dict[str, Any]: Dictionary with parameters calculated based on input data.
- """
- img_h, img_w = params["shape"][:2]
- if self.max_size is not None:
- if isinstance(self.max_size, (list, tuple)):
- max_size = self.py_random.choice(self.max_size)
- else:
- max_size = self.max_size
- scale = max_size / min(img_h, img_w)
- elif self.max_size_hw is not None:
- max_h, max_w = self.max_size_hw
- if max_h is not None and max_w is not None:
- # Scale based on smallest side to maintain aspect ratio
- h_scale = max_h / img_h
- w_scale = max_w / img_w
- scale = max(h_scale, w_scale)
- elif max_h is not None:
- # Only height specified
- scale = max_h / img_h
- else:
- # Only width specified
- scale = max_w / img_w
- return {"scale": scale}
- class Resize(DualTransform):
- """Resize the input to the given height and width.
- Args:
- height (int): desired height of the output.
- width (int): desired width of the output.
- interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
- cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_LINEAR.
- mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
- Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_NEAREST.
- area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
- for downscaling. Options:
- - None: No automatic interpolation selection, always use the specified interpolation method
- - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
- - "image_mask": Use INTER_AREA when downscaling both images and masks
- Default: None.
- p (float): probability of applying the transform. Default: 1.
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Examples:
- >>> import numpy as np
- >>> import albumentations as A
- >>> import cv2
- >>>
- >>> # Create sample data for demonstration
- >>> image = np.zeros((100, 100, 3), dtype=np.uint8)
- >>> # Add some shapes to visualize resize effects
- >>> cv2.rectangle(image, (25, 25), (75, 75), (255, 0, 0), -1) # Red square
- >>> cv2.circle(image, (50, 50), 10, (0, 255, 0), -1) # Green circle
- >>>
- >>> # Create a mask for segmentation
- >>> mask = np.zeros((100, 100), dtype=np.uint8)
- >>> mask[25:75, 25:75] = 1 # Mask covering the red square
- >>>
- >>> # Create bounding boxes and keypoints
- >>> bboxes = np.array([[25, 25, 75, 75]]) # Box around the red square
- >>> bbox_labels = [1]
- >>> keypoints = np.array([[50, 50]]) # Center of circle
- >>> keypoint_labels = [0]
- >>>
- >>> # Resize all data to 224x224 (common input size for many CNNs)
- >>> transform = A.Compose([
- ... A.Resize(
- ... height=224,
- ... width=224,
- ... interpolation=cv2.INTER_LINEAR,
- ... mask_interpolation=cv2.INTER_NEAREST,
- ... area_for_downscale="image", # Use INTER_AREA when downscaling images
- ... p=1.0
- ... )
- ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
- ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
- >>>
- >>> # Apply the transform to all targets
- >>> result = transform(
- ... image=image,
- ... mask=mask,
- ... bboxes=bboxes,
- ... bbox_labels=bbox_labels,
- ... keypoints=keypoints,
- ... keypoint_labels=keypoint_labels
- ... )
- >>>
- >>> # Get the transformed results
- >>> resized_image = result['image'] # Shape will be (224, 224, 3)
- >>> resized_mask = result['mask'] # Shape will be (224, 224)
- >>> resized_bboxes = result['bboxes'] # Bounding boxes scaled to new dimensions
- >>> resized_bbox_labels = result['bbox_labels'] # Labels remain unchanged
- >>> resized_keypoints = result['keypoints'] # Keypoints scaled to new dimensions
- >>> resized_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
- >>>
- >>> # Note: When resizing from 100x100 to 224x224:
- >>> # - The red square will be scaled from (25-75) to approximately (56-168)
- >>> # - The keypoint at (50, 50) will move to approximately (112, 112)
- >>> # - All spatial relationships are preserved but coordinates are scaled
- """
- _targets = ALL_TARGETS
- class InitSchema(BaseTransformInitSchema):
- height: int = Field(ge=1)
- width: int = Field(ge=1)
- area_for_downscale: Literal[None, "image", "image_mask"]
- interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ]
- mask_interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ]
- def __init__(
- self,
- height: int,
- width: int,
- interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ] = cv2.INTER_LINEAR,
- mask_interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_NEAREST_EXACT,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- cv2.INTER_LINEAR_EXACT,
- ] = cv2.INTER_NEAREST,
- area_for_downscale: Literal[None, "image", "image_mask"] = None,
- p: float = 1,
- ):
- super().__init__(p=p)
- self.height = height
- self.width = width
- self.interpolation = interpolation
- self.mask_interpolation = mask_interpolation
- self.area_for_downscale = area_for_downscale
- def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
- """Apply resizing to the image.
- Args:
- img (np.ndarray): Image to resize.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Resized image.
- """
- height, width = img.shape[:2]
- is_downscale = (self.height < height) or (self.width < width)
- interpolation = self.interpolation
- if self.area_for_downscale in ["image", "image_mask"] and is_downscale:
- interpolation = cv2.INTER_AREA
- return fgeometric.resize(img, (self.height, self.width), interpolation=interpolation)
- def apply_to_mask(self, mask: np.ndarray, **params: Any) -> np.ndarray:
- """Apply resizing to the mask.
- Args:
- mask (np.ndarray): Mask to resize.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Resized mask.
- """
- height, width = mask.shape[:2]
- is_downscale = (self.height < height) or (self.width < width)
- interpolation = self.mask_interpolation
- if self.area_for_downscale == "image_mask" and is_downscale:
- interpolation = cv2.INTER_AREA
- return fgeometric.resize(mask, (self.height, self.width), interpolation=interpolation)
- def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
- """Apply the transform to bounding boxes.
- Args:
- bboxes (np.ndarray): Bounding boxes to transform.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Transformed bounding boxes which are scale invariant.
- """
- # Bounding box coordinates are scale invariant
- return bboxes
- def apply_to_keypoints(self, keypoints: np.ndarray, **params: Any) -> np.ndarray:
- """Apply resizing to keypoints.
- Args:
- keypoints (np.ndarray): Keypoints to resize.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Resized keypoints.
- """
- height, width = params["shape"][:2]
- scale_x = self.width / width
- scale_y = self.height / height
- return fgeometric.keypoints_scale(keypoints, scale_x, scale_y)
|