| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850 |
- """Transforms for rotating images and associated data.
- This module provides classes for rotating images, masks, bounding boxes, and keypoints.
- Includes transforms for 90-degree rotations and arbitrary angle rotations with various
- border handling options.
- """
- from __future__ import annotations
- import math
- from typing import Any, cast
- import cv2
- import numpy as np
- from typing_extensions import Literal
- from albumentations.augmentations.crops import functional as fcrops
- from albumentations.augmentations.geometric.transforms import Affine
- from albumentations.core.pydantic import SymmetricRangeType
- from albumentations.core.transforms_interface import (
- BaseTransformInitSchema,
- DualTransform,
- )
- from albumentations.core.type_definitions import ALL_TARGETS
- from . import functional as fgeometric
- __all__ = ["RandomRotate90", "Rotate", "SafeRotate"]
- SMALL_NUMBER = 1e-10
- class RandomRotate90(DualTransform):
- """Randomly rotate the input by 90 degrees zero or more times.
- Even with p=1.0, the transform has a 1/4 probability of being identity:
- - With probability p * 1/4: no rotation (0 degrees)
- - With probability p * 1/4: rotate 90 degrees
- - With probability p * 1/4: rotate 180 degrees
- - With probability p * 1/4: rotate 270 degrees
- For example:
- - With p=1.0: Each rotation angle (including 0°) has 0.25 probability
- - With p=0.8: Each rotation angle has 0.2 probability, and no transform has 0.2 probability
- - With p=0.5: Each rotation angle has 0.125 probability, and no transform has 0.5 probability
- Common applications:
- - Aerial/satellite imagery: Objects can appear in any orientation
- - Medical imaging: Scans/slides may not have a consistent orientation
- - Document analysis: Pages or symbols might be rotated
- - Microscopy: Cell orientation is often arbitrary
- - Game development: Sprites/textures that should work in multiple orientations
- Not recommended for:
- - Natural scene images where gravity matters (e.g., landscape photography)
- - Face detection/recognition tasks
- - Text recognition (unless text can appear rotated)
- - Tasks where object orientation is important for classification
- Note:
- If your domain has both 90-degree rotation AND flip symmetries
- (e.g., satellite imagery, microscopy), consider using `D4` transform instead.
- `D4` is more efficient and mathematically correct as it:
- - Samples uniformly from all 8 possible combinations of rotations and flips
- - Properly represents the dihedral group D4 symmetries
- - Avoids potential correlation between separate rotation and flip augmentations
- Args:
- p (float): probability of applying the transform. Default: 1.0.
- Note that even with p=1.0, there's still a 0.25 probability
- of getting a 0-degree rotation (identity transform).
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Examples:
- >>> import numpy as np
- >>> import albumentations as A
- >>> # Create example data
- >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
- >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
- >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
- >>> bbox_labels = [1, 2] # Class labels for bounding boxes
- >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
- >>> keypoint_labels = [0, 1] # Labels for keypoints
- >>> # Define the transform
- >>> transform = A.Compose([
- ... A.RandomRotate90(p=1.0),
- ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
- ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
- >>> # Apply the transform to all targets
- >>> transformed = transform(
- ... image=image,
- ... mask=mask,
- ... bboxes=bboxes,
- ... bbox_labels=bbox_labels,
- ... keypoints=keypoints,
- ... keypoint_labels=keypoint_labels
- ... )
- >>> rotated_image = transformed["image"]
- >>> rotated_mask = transformed["mask"]
- >>> rotated_bboxes = transformed["bboxes"]
- >>> rotated_bbox_labels = transformed["bbox_labels"]
- >>> rotated_keypoints = transformed["keypoints"]
- >>> rotated_keypoint_labels = transformed["keypoint_labels"]
- """
- _targets = ALL_TARGETS
- def __init__(
- self,
- p: float = 1,
- ):
- super().__init__(p=p)
- def apply(self, img: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
- """Apply rotation to the input image.
- Args:
- img (np.ndarray): Image to rotate.
- factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Rotated image.
- """
- return fgeometric.rot90(img, factor)
- def get_params(self) -> dict[str, int]:
- """Get parameters for the transform.
- Returns:
- dict[str, int]: Dictionary with the rotation factor.
- """
- # Random int in the range [0, 3]
- return {"factor": self.py_random.randint(0, 3)}
- def apply_to_bboxes(
- self,
- bboxes: np.ndarray,
- factor: Literal[0, 1, 2, 3],
- **params: Any,
- ) -> np.ndarray:
- """Apply rotation to bounding boxes.
- Args:
- bboxes (np.ndarray): Bounding boxes to rotate.
- factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Rotated bounding boxes.
- """
- return fgeometric.bboxes_rot90(bboxes, factor)
- def apply_to_keypoints(
- self,
- keypoints: np.ndarray,
- factor: Literal[0, 1, 2, 3],
- **params: Any,
- ) -> np.ndarray:
- """Apply rotation to keypoints.
- Args:
- keypoints (np.ndarray): Keypoints to rotate.
- factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Rotated keypoints.
- """
- return fgeometric.keypoints_rot90(keypoints, factor, params["shape"])
- def apply_to_volume(self, volume: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
- """Apply rotation to the input volume.
- Args:
- volume (np.ndarray): Volume to rotate.
- factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Rotated volume.
- """
- return fgeometric.volume_rot90(volume, factor)
- def apply_to_volumes(self, volumes: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
- """Apply rotation to the input volumes.
- Args:
- volumes (np.ndarray): Volumes to rotate.
- factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Rotated volumes.
- """
- return fgeometric.volumes_rot90(volumes, factor)
- def apply_to_mask3d(self, mask3d: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
- """Apply rotation to the input mask3d.
- Args:
- mask3d (np.ndarray): Mask3d to rotate.
- factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Rotated mask3d.
- """
- return fgeometric.volume_rot90(mask3d, factor)
- def apply_to_masks3d(self, masks3d: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
- """Apply rotation to the input masks3d.
- Args:
- masks3d (np.ndarray): Masks3d to rotate.
- factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Rotated masks3d.
- """
- return fgeometric.volumes_rot90(masks3d, factor)
- class RotateInitSchema(BaseTransformInitSchema):
- limit: SymmetricRangeType
- interpolation: Literal[cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
- mask_interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- ]
- border_mode: Literal[
- cv2.BORDER_CONSTANT,
- cv2.BORDER_REPLICATE,
- cv2.BORDER_REFLECT,
- cv2.BORDER_WRAP,
- cv2.BORDER_REFLECT_101,
- ]
- fill: tuple[float, ...] | float
- fill_mask: tuple[float, ...] | float | None
- class Rotate(DualTransform):
- """Rotate the input by an angle selected randomly from the uniform distribution.
- Args:
- limit (float | tuple[float, float]): Range from which a random angle is picked. If limit is a single float,
- an angle is picked from (-limit, limit). Default: (-90, 90)
- interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
- cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_LINEAR.
- border_mode (OpenCV flag): Flag that is used to specify the pixel extrapolation method. Should be one of:
- cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
- Default: cv2.BORDER_CONSTANT
- fill (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT.
- fill_mask (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
- rotate_method (Literal["largest_box", "ellipse"]): Method to rotate bounding boxes.
- Should be 'largest_box' or 'ellipse'. Default: 'largest_box'
- crop_border (bool): Whether to crop border after rotation. If True, the output image size might differ
- from the input. Default: False
- mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
- Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_NEAREST.
- p (float): Probability of applying the transform. Default: 0.5.
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Note:
- - The rotation angle is randomly selected for each execution within the range specified by 'limit'.
- - When 'crop_border' is False, the output image will have the same size as the input, potentially
- introducing black triangles in the corners.
- - When 'crop_border' is True, the output image is cropped to remove black triangles, which may result
- in a smaller image.
- - Bounding boxes are rotated and may change size or shape.
- - Keypoints are rotated around the center of the image.
- Mathematical Details:
- 1. An angle θ is randomly sampled from the range specified by 'limit'.
- 2. The image is rotated around its center by θ degrees.
- 3. The rotation matrix R is:
- R = [cos(θ) -sin(θ)]
- [sin(θ) cos(θ)]
- 4. Each point (x, y) in the image is transformed to (x', y') by:
- [x'] [cos(θ) -sin(θ)] [x - cx] [cx]
- [y'] = [sin(θ) cos(θ)] [y - cy] + [cy]
- where (cx, cy) is the center of the image.
- 5. If 'crop_border' is True, the image is cropped to the largest rectangle that fits inside the rotated image.
- Examples:
- >>> import numpy as np
- >>> import albumentations as A
- >>> # Create example data
- >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
- >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
- >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
- >>> bbox_labels = [1, 2] # Class labels for bounding boxes
- >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
- >>> keypoint_labels = [0, 1] # Labels for keypoints
- >>> # Define the transform
- >>> transform = A.Compose([
- ... A.Rotate(limit=45, p=1.0),
- ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
- ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
- >>> # Apply the transform to all targets
- >>> transformed = transform(
- ... image=image,
- ... mask=mask,
- ... bboxes=bboxes,
- ... bbox_labels=bbox_labels,
- ... keypoints=keypoints,
- ... keypoint_labels=keypoint_labels
- ... )
- >>> rotated_image = transformed["image"]
- >>> rotated_mask = transformed["mask"]
- >>> rotated_bboxes = transformed["bboxes"]
- >>> rotated_bbox_labels = transformed["bbox_labels"]
- >>> rotated_keypoints = transformed["keypoints"]
- >>> rotated_keypoint_labels = transformed["keypoint_labels"]
- """
- _targets = ALL_TARGETS
- class InitSchema(RotateInitSchema):
- rotate_method: Literal["largest_box", "ellipse"]
- crop_border: bool
- fill: tuple[float, ...] | float
- fill_mask: tuple[float, ...] | float
- def __init__(
- self,
- limit: tuple[float, float] | float = (-90, 90),
- interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- ] = cv2.INTER_LINEAR,
- border_mode: Literal[
- cv2.BORDER_CONSTANT,
- cv2.BORDER_REPLICATE,
- cv2.BORDER_REFLECT,
- cv2.BORDER_WRAP,
- cv2.BORDER_REFLECT_101,
- ] = cv2.BORDER_CONSTANT,
- rotate_method: Literal["largest_box", "ellipse"] = "largest_box",
- crop_border: bool = False,
- mask_interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- ] = cv2.INTER_NEAREST,
- fill: tuple[float, ...] | float = 0,
- fill_mask: tuple[float, ...] | float = 0,
- p: float = 0.5,
- ):
- super().__init__(p=p)
- self.limit = cast("tuple[float, float]", limit)
- self.interpolation = interpolation
- self.mask_interpolation = mask_interpolation
- self.border_mode = border_mode
- self.fill = fill
- self.fill_mask = fill_mask
- self.rotate_method = rotate_method
- self.crop_border = crop_border
- def apply(
- self,
- img: np.ndarray,
- matrix: np.ndarray,
- x_min: int,
- x_max: int,
- y_min: int,
- y_max: int,
- **params: Any,
- ) -> np.ndarray:
- """Apply affine transformation to the image.
- Args:
- img (np.ndarray): Image to transform.
- matrix (np.ndarray): Affine transformation matrix.
- x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
- x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
- y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
- y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Transformed image.
- """
- img_out = fgeometric.warp_affine(
- img,
- matrix,
- self.interpolation,
- self.fill,
- self.border_mode,
- params["shape"][:2],
- )
- if self.crop_border:
- return fcrops.crop(img_out, x_min, y_min, x_max, y_max)
- return img_out
- def apply_to_mask(
- self,
- mask: np.ndarray,
- matrix: np.ndarray,
- x_min: int,
- x_max: int,
- y_min: int,
- y_max: int,
- **params: Any,
- ) -> np.ndarray:
- """Apply affine transformation to the mask.
- Args:
- mask (np.ndarray): Mask to transform.
- matrix (np.ndarray): Affine transformation matrix.
- x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
- x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
- y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
- y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Transformed mask.
- """
- img_out = fgeometric.warp_affine(
- mask,
- matrix,
- self.mask_interpolation,
- self.fill_mask,
- self.border_mode,
- params["shape"][:2],
- )
- if self.crop_border:
- return fcrops.crop(img_out, x_min, y_min, x_max, y_max)
- return img_out
- def apply_to_bboxes(
- self,
- bboxes: np.ndarray,
- bbox_matrix: np.ndarray,
- x_min: int,
- x_max: int,
- y_min: int,
- y_max: int,
- **params: Any,
- ) -> np.ndarray:
- """Apply affine transformation to bounding boxes.
- Args:
- bboxes (np.ndarray): Bounding boxes to transform.
- bbox_matrix (np.ndarray): Affine transformation matrix for bounding boxes.
- x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
- x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
- y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
- y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Transformed bounding boxes.
- """
- image_shape = params["shape"][:2]
- bboxes_out = fgeometric.bboxes_affine(
- bboxes,
- bbox_matrix,
- self.rotate_method,
- image_shape,
- self.border_mode,
- image_shape,
- )
- if self.crop_border:
- return fcrops.crop_bboxes_by_coords(
- bboxes_out,
- (x_min, y_min, x_max, y_max),
- image_shape,
- )
- return bboxes_out
- def apply_to_keypoints(
- self,
- keypoints: np.ndarray,
- matrix: np.ndarray,
- x_min: int,
- x_max: int,
- y_min: int,
- y_max: int,
- **params: Any,
- ) -> np.ndarray:
- """Apply affine transformation to keypoints.
- Args:
- keypoints (np.ndarray): Keypoints to transform.
- matrix (np.ndarray): Affine transformation matrix.
- x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
- x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
- y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
- y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
- **params (Any): Additional parameters.
- Returns:
- np.ndarray: Transformed keypoints.
- """
- keypoints_out = fgeometric.keypoints_affine(
- keypoints,
- matrix,
- params["shape"][:2],
- scale={"x": 1, "y": 1},
- border_mode=self.border_mode,
- )
- if self.crop_border:
- return fcrops.crop_keypoints_by_coords(
- keypoints_out,
- (x_min, y_min, x_max, y_max),
- )
- return keypoints_out
- @staticmethod
- def _rotated_rect_with_max_area(
- height: int,
- width: int,
- angle: float,
- ) -> dict[str, int]:
- """Given a rectangle of size wxh that has been rotated by 'angle' (in
- degrees), computes the width and height of the largest possible
- axis-aligned rectangle (maximal area) within the rotated rectangle.
- References:
- Rotate image and crop out black borders: https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
- """
- angle = math.radians(angle)
- width_is_longer = width >= height
- side_long, side_short = (width, height) if width_is_longer else (height, width)
- # since the solutions for angle, -angle and 180-angle are all the same,
- # it is sufficient to look at the first quadrant and the absolute values of sin,cos:
- sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
- if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < SMALL_NUMBER:
- # half constrained case: two crop corners touch the longer side,
- # the other two corners are on the mid-line parallel to the longer line
- x = 0.5 * side_short
- wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
- else:
- # fully constrained case: crop touches all 4 sides
- cos_2a = cos_a * cos_a - sin_a * sin_a
- wr, hr = (
- (width * cos_a - height * sin_a) / cos_2a,
- (height * cos_a - width * sin_a) / cos_2a,
- )
- return {
- "x_min": max(0, int(width / 2 - wr / 2)),
- "x_max": min(width, int(width / 2 + wr / 2)),
- "y_min": max(0, int(height / 2 - hr / 2)),
- "y_max": min(height, int(height / 2 + hr / 2)),
- }
- def get_params_dependent_on_data(
- self,
- params: dict[str, Any],
- data: dict[str, Any],
- ) -> dict[str, Any]:
- """Get parameters dependent on the data.
- Args:
- params (dict[str, Any]): Dictionary containing parameters.
- data (dict[str, Any]): Dictionary containing data.
- Returns:
- dict[str, Any]: Dictionary with parameters for transformation.
- """
- angle = self.py_random.uniform(*self.limit)
- if self.crop_border:
- height, width = params["shape"][:2]
- out_params = self._rotated_rect_with_max_area(height, width, angle)
- else:
- out_params = {"x_min": -1, "x_max": -1, "y_min": -1, "y_max": -1}
- center = fgeometric.center(params["shape"][:2])
- bbox_center = fgeometric.center_bbox(params["shape"][:2])
- translate: dict[str, int] = {"x": 0, "y": 0}
- shear: dict[str, float] = {"x": 0, "y": 0}
- scale: dict[str, float] = {"x": 1, "y": 1}
- rotate = angle
- matrix = fgeometric.create_affine_transformation_matrix(
- translate,
- shear,
- scale,
- rotate,
- center,
- )
- bbox_matrix = fgeometric.create_affine_transformation_matrix(
- translate,
- shear,
- scale,
- rotate,
- bbox_center,
- )
- out_params["matrix"] = matrix
- out_params["bbox_matrix"] = bbox_matrix
- return out_params
- class SafeRotate(Affine):
- """Rotate the input inside the input's frame by an angle selected randomly from the uniform distribution.
- This transformation ensures that the entire rotated image fits within the original frame by scaling it
- down if necessary. The resulting image maintains its original dimensions but may contain artifacts due to the
- rotation and scaling process.
- Args:
- limit (float | tuple[float, float]): Range from which a random angle is picked. If limit is a single float,
- an angle is picked from (-limit, limit). Default: (-90, 90)
- interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
- cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_LINEAR.
- border_mode (OpenCV flag): Flag that is used to specify the pixel extrapolation method. Should be one of:
- cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
- Default: cv2.BORDER_REFLECT_101
- fill (tuple[float, float] | float): Padding value if border_mode is cv2.BORDER_CONSTANT.
- fill_mask (tuple[float, float] | float): Padding value if border_mode is cv2.BORDER_CONSTANT applied
- for masks.
- rotate_method (Literal["largest_box", "ellipse"]): Method to rotate bounding boxes.
- Should be 'largest_box' or 'ellipse'. Default: 'largest_box'
- mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
- Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
- Default: cv2.INTER_NEAREST.
- p (float): Probability of applying the transform. Default: 0.5.
- Targets:
- image, mask, bboxes, keypoints, volume, mask3d
- Image types:
- uint8, float32
- Note:
- - The rotation is performed around the center of the image.
- - After rotation, the image is scaled to fit within the original frame, which may cause some distortion.
- - The output image will always have the same dimensions as the input image.
- - Bounding boxes and keypoints are transformed along with the image.
- Mathematical Details:
- 1. An angle θ is randomly sampled from the range specified by 'limit'.
- 2. The image is rotated around its center by θ degrees.
- 3. The rotation matrix R is:
- R = [cos(θ) -sin(θ)]
- [sin(θ) cos(θ)]
- 4. The scaling factor s is calculated to ensure the rotated image fits within the original frame:
- s = min(width / (width * |cos(θ)| + height * |sin(θ)|),
- height / (width * |sin(θ)| + height * |cos(θ)|))
- 5. The combined transformation matrix T is:
- T = [s*cos(θ) -s*sin(θ) tx]
- [s*sin(θ) s*cos(θ) ty]
- where tx and ty are translation factors to keep the image centered.
- 6. Each point (x, y) in the image is transformed to (x', y') by:
- [x'] [s*cos(θ) s*sin(θ)] [x - cx] [cx]
- [y'] = [-s*sin(θ) s*cos(θ)] [y - cy] + [cy]
- where (cx, cy) is the center of the image.
- Examples:
- >>> import numpy as np
- >>> import albumentations as A
- >>> # Create example data
- >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
- >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
- >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
- >>> bbox_labels = [1, 2] # Class labels for bounding boxes
- >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
- >>> keypoint_labels = [0, 1] # Labels for keypoints
- >>> # Define the transform
- >>> transform = A.Compose([
- ... A.SafeRotate(limit=45, p=1.0),
- ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
- ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
- >>> # Apply the transform to all targets
- >>> transformed = transform(
- ... image=image,
- ... mask=mask,
- ... bboxes=bboxes,
- ... bbox_labels=bbox_labels,
- ... keypoints=keypoints,
- ... keypoint_labels=keypoint_labels
- ... )
- >>> rotated_image = transformed["image"]
- >>> rotated_mask = transformed["mask"]
- >>> rotated_bboxes = transformed["bboxes"]
- >>> rotated_bbox_labels = transformed["bbox_labels"]
- >>> rotated_keypoints = transformed["keypoints"]
- >>> rotated_keypoint_labels = transformed["keypoint_labels"]
- """
- _targets = ALL_TARGETS
- class InitSchema(RotateInitSchema):
- rotate_method: Literal["largest_box", "ellipse"]
- def __init__(
- self,
- limit: tuple[float, float] | float = (-90, 90),
- interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- ] = cv2.INTER_LINEAR,
- border_mode: Literal[
- cv2.BORDER_CONSTANT,
- cv2.BORDER_REPLICATE,
- cv2.BORDER_REFLECT,
- cv2.BORDER_WRAP,
- cv2.BORDER_REFLECT_101,
- ] = cv2.BORDER_CONSTANT,
- rotate_method: Literal["largest_box", "ellipse"] = "largest_box",
- mask_interpolation: Literal[
- cv2.INTER_NEAREST,
- cv2.INTER_LINEAR,
- cv2.INTER_CUBIC,
- cv2.INTER_AREA,
- cv2.INTER_LANCZOS4,
- ] = cv2.INTER_NEAREST,
- fill: tuple[float, ...] | float = 0,
- fill_mask: tuple[float, ...] | float = 0,
- p: float = 0.5,
- ):
- super().__init__(
- rotate=limit,
- interpolation=interpolation,
- border_mode=border_mode,
- fill=fill,
- fill_mask=fill_mask,
- rotate_method=rotate_method,
- fit_output=True,
- mask_interpolation=mask_interpolation,
- p=p,
- )
- self.limit = cast("tuple[float, float]", limit)
- def _create_safe_rotate_matrix(
- self,
- angle: float,
- center: tuple[float, float],
- image_shape: tuple[int, int],
- ) -> tuple[np.ndarray, dict[str, float]]:
- height, width = image_shape[:2]
- rotation_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
- # Calculate new image size
- abs_cos = abs(rotation_mat[0, 0])
- abs_sin = abs(rotation_mat[0, 1])
- new_w = int(height * abs_sin + width * abs_cos)
- new_h = int(height * abs_cos + width * abs_sin)
- # Adjust the rotation matrix to take into account the new size
- rotation_mat[0, 2] += new_w / 2 - center[0]
- rotation_mat[1, 2] += new_h / 2 - center[1]
- # Calculate scaling factors
- scale_x = width / new_w
- scale_y = height / new_h
- # Create scaling matrix
- scale_mat = np.array([[scale_x, 0, 0], [0, scale_y, 0], [0, 0, 1]])
- # Combine rotation and scaling
- matrix = scale_mat @ np.vstack([rotation_mat, [0, 0, 1]])
- return matrix, {"x": scale_x, "y": scale_y}
- def get_params_dependent_on_data(
- self,
- params: dict[str, Any],
- data: dict[str, Any],
- ) -> dict[str, Any]:
- """Get parameters dependent on the data.
- Args:
- params (dict[str, Any]): Dictionary containing parameters.
- data (dict[str, Any]): Dictionary containing data.
- Returns:
- dict[str, Any]: Dictionary with parameters for transformation.
- """
- image_shape = params["shape"][:2]
- angle = self.py_random.uniform(*self.limit)
- # Calculate centers for image and bbox
- image_center = fgeometric.center(image_shape)
- bbox_center = fgeometric.center_bbox(image_shape)
- # Create matrices for image and bbox
- matrix, scale = self._create_safe_rotate_matrix(
- angle,
- image_center,
- image_shape,
- )
- bbox_matrix, _ = self._create_safe_rotate_matrix(
- angle,
- bbox_center,
- image_shape,
- )
- return {
- "rotate": angle,
- "scale": scale,
- "matrix": matrix,
- "bbox_matrix": bbox_matrix,
- "output_shape": image_shape,
- }
|