rotate.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850
  1. """Transforms for rotating images and associated data.
  2. This module provides classes for rotating images, masks, bounding boxes, and keypoints.
  3. Includes transforms for 90-degree rotations and arbitrary angle rotations with various
  4. border handling options.
  5. """
  6. from __future__ import annotations
  7. import math
  8. from typing import Any, cast
  9. import cv2
  10. import numpy as np
  11. from typing_extensions import Literal
  12. from albumentations.augmentations.crops import functional as fcrops
  13. from albumentations.augmentations.geometric.transforms import Affine
  14. from albumentations.core.pydantic import SymmetricRangeType
  15. from albumentations.core.transforms_interface import (
  16. BaseTransformInitSchema,
  17. DualTransform,
  18. )
  19. from albumentations.core.type_definitions import ALL_TARGETS
  20. from . import functional as fgeometric
  21. __all__ = ["RandomRotate90", "Rotate", "SafeRotate"]
  22. SMALL_NUMBER = 1e-10
  23. class RandomRotate90(DualTransform):
  24. """Randomly rotate the input by 90 degrees zero or more times.
  25. Even with p=1.0, the transform has a 1/4 probability of being identity:
  26. - With probability p * 1/4: no rotation (0 degrees)
  27. - With probability p * 1/4: rotate 90 degrees
  28. - With probability p * 1/4: rotate 180 degrees
  29. - With probability p * 1/4: rotate 270 degrees
  30. For example:
  31. - With p=1.0: Each rotation angle (including 0°) has 0.25 probability
  32. - With p=0.8: Each rotation angle has 0.2 probability, and no transform has 0.2 probability
  33. - With p=0.5: Each rotation angle has 0.125 probability, and no transform has 0.5 probability
  34. Common applications:
  35. - Aerial/satellite imagery: Objects can appear in any orientation
  36. - Medical imaging: Scans/slides may not have a consistent orientation
  37. - Document analysis: Pages or symbols might be rotated
  38. - Microscopy: Cell orientation is often arbitrary
  39. - Game development: Sprites/textures that should work in multiple orientations
  40. Not recommended for:
  41. - Natural scene images where gravity matters (e.g., landscape photography)
  42. - Face detection/recognition tasks
  43. - Text recognition (unless text can appear rotated)
  44. - Tasks where object orientation is important for classification
  45. Note:
  46. If your domain has both 90-degree rotation AND flip symmetries
  47. (e.g., satellite imagery, microscopy), consider using `D4` transform instead.
  48. `D4` is more efficient and mathematically correct as it:
  49. - Samples uniformly from all 8 possible combinations of rotations and flips
  50. - Properly represents the dihedral group D4 symmetries
  51. - Avoids potential correlation between separate rotation and flip augmentations
  52. Args:
  53. p (float): probability of applying the transform. Default: 1.0.
  54. Note that even with p=1.0, there's still a 0.25 probability
  55. of getting a 0-degree rotation (identity transform).
  56. Targets:
  57. image, mask, bboxes, keypoints, volume, mask3d
  58. Image types:
  59. uint8, float32
  60. Examples:
  61. >>> import numpy as np
  62. >>> import albumentations as A
  63. >>> # Create example data
  64. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  65. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  66. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  67. >>> bbox_labels = [1, 2] # Class labels for bounding boxes
  68. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  69. >>> keypoint_labels = [0, 1] # Labels for keypoints
  70. >>> # Define the transform
  71. >>> transform = A.Compose([
  72. ... A.RandomRotate90(p=1.0),
  73. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  74. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  75. >>> # Apply the transform to all targets
  76. >>> transformed = transform(
  77. ... image=image,
  78. ... mask=mask,
  79. ... bboxes=bboxes,
  80. ... bbox_labels=bbox_labels,
  81. ... keypoints=keypoints,
  82. ... keypoint_labels=keypoint_labels
  83. ... )
  84. >>> rotated_image = transformed["image"]
  85. >>> rotated_mask = transformed["mask"]
  86. >>> rotated_bboxes = transformed["bboxes"]
  87. >>> rotated_bbox_labels = transformed["bbox_labels"]
  88. >>> rotated_keypoints = transformed["keypoints"]
  89. >>> rotated_keypoint_labels = transformed["keypoint_labels"]
  90. """
  91. _targets = ALL_TARGETS
  92. def __init__(
  93. self,
  94. p: float = 1,
  95. ):
  96. super().__init__(p=p)
  97. def apply(self, img: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
  98. """Apply rotation to the input image.
  99. Args:
  100. img (np.ndarray): Image to rotate.
  101. factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
  102. **params (Any): Additional parameters.
  103. Returns:
  104. np.ndarray: Rotated image.
  105. """
  106. return fgeometric.rot90(img, factor)
  107. def get_params(self) -> dict[str, int]:
  108. """Get parameters for the transform.
  109. Returns:
  110. dict[str, int]: Dictionary with the rotation factor.
  111. """
  112. # Random int in the range [0, 3]
  113. return {"factor": self.py_random.randint(0, 3)}
  114. def apply_to_bboxes(
  115. self,
  116. bboxes: np.ndarray,
  117. factor: Literal[0, 1, 2, 3],
  118. **params: Any,
  119. ) -> np.ndarray:
  120. """Apply rotation to bounding boxes.
  121. Args:
  122. bboxes (np.ndarray): Bounding boxes to rotate.
  123. factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
  124. **params (Any): Additional parameters.
  125. Returns:
  126. np.ndarray: Rotated bounding boxes.
  127. """
  128. return fgeometric.bboxes_rot90(bboxes, factor)
  129. def apply_to_keypoints(
  130. self,
  131. keypoints: np.ndarray,
  132. factor: Literal[0, 1, 2, 3],
  133. **params: Any,
  134. ) -> np.ndarray:
  135. """Apply rotation to keypoints.
  136. Args:
  137. keypoints (np.ndarray): Keypoints to rotate.
  138. factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
  139. **params (Any): Additional parameters.
  140. Returns:
  141. np.ndarray: Rotated keypoints.
  142. """
  143. return fgeometric.keypoints_rot90(keypoints, factor, params["shape"])
  144. def apply_to_volume(self, volume: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
  145. """Apply rotation to the input volume.
  146. Args:
  147. volume (np.ndarray): Volume to rotate.
  148. factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
  149. **params (Any): Additional parameters.
  150. Returns:
  151. np.ndarray: Rotated volume.
  152. """
  153. return fgeometric.volume_rot90(volume, factor)
  154. def apply_to_volumes(self, volumes: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
  155. """Apply rotation to the input volumes.
  156. Args:
  157. volumes (np.ndarray): Volumes to rotate.
  158. factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
  159. **params (Any): Additional parameters.
  160. Returns:
  161. np.ndarray: Rotated volumes.
  162. """
  163. return fgeometric.volumes_rot90(volumes, factor)
  164. def apply_to_mask3d(self, mask3d: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
  165. """Apply rotation to the input mask3d.
  166. Args:
  167. mask3d (np.ndarray): Mask3d to rotate.
  168. factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
  169. **params (Any): Additional parameters.
  170. Returns:
  171. np.ndarray: Rotated mask3d.
  172. """
  173. return fgeometric.volume_rot90(mask3d, factor)
  174. def apply_to_masks3d(self, masks3d: np.ndarray, factor: Literal[0, 1, 2, 3], **params: Any) -> np.ndarray:
  175. """Apply rotation to the input masks3d.
  176. Args:
  177. masks3d (np.ndarray): Masks3d to rotate.
  178. factor (Literal[0, 1, 2, 3]): Number of times to rotate by 90 degrees.
  179. **params (Any): Additional parameters.
  180. Returns:
  181. np.ndarray: Rotated masks3d.
  182. """
  183. return fgeometric.volumes_rot90(masks3d, factor)
  184. class RotateInitSchema(BaseTransformInitSchema):
  185. limit: SymmetricRangeType
  186. interpolation: Literal[cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
  187. mask_interpolation: Literal[
  188. cv2.INTER_NEAREST,
  189. cv2.INTER_LINEAR,
  190. cv2.INTER_CUBIC,
  191. cv2.INTER_AREA,
  192. cv2.INTER_LANCZOS4,
  193. ]
  194. border_mode: Literal[
  195. cv2.BORDER_CONSTANT,
  196. cv2.BORDER_REPLICATE,
  197. cv2.BORDER_REFLECT,
  198. cv2.BORDER_WRAP,
  199. cv2.BORDER_REFLECT_101,
  200. ]
  201. fill: tuple[float, ...] | float
  202. fill_mask: tuple[float, ...] | float | None
  203. class Rotate(DualTransform):
  204. """Rotate the input by an angle selected randomly from the uniform distribution.
  205. Args:
  206. limit (float | tuple[float, float]): Range from which a random angle is picked. If limit is a single float,
  207. an angle is picked from (-limit, limit). Default: (-90, 90)
  208. interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
  209. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  210. Default: cv2.INTER_LINEAR.
  211. border_mode (OpenCV flag): Flag that is used to specify the pixel extrapolation method. Should be one of:
  212. cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
  213. Default: cv2.BORDER_CONSTANT
  214. fill (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT.
  215. fill_mask (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT applied for masks.
  216. rotate_method (Literal["largest_box", "ellipse"]): Method to rotate bounding boxes.
  217. Should be 'largest_box' or 'ellipse'. Default: 'largest_box'
  218. crop_border (bool): Whether to crop border after rotation. If True, the output image size might differ
  219. from the input. Default: False
  220. mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
  221. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  222. Default: cv2.INTER_NEAREST.
  223. p (float): Probability of applying the transform. Default: 0.5.
  224. Targets:
  225. image, mask, bboxes, keypoints, volume, mask3d
  226. Image types:
  227. uint8, float32
  228. Note:
  229. - The rotation angle is randomly selected for each execution within the range specified by 'limit'.
  230. - When 'crop_border' is False, the output image will have the same size as the input, potentially
  231. introducing black triangles in the corners.
  232. - When 'crop_border' is True, the output image is cropped to remove black triangles, which may result
  233. in a smaller image.
  234. - Bounding boxes are rotated and may change size or shape.
  235. - Keypoints are rotated around the center of the image.
  236. Mathematical Details:
  237. 1. An angle θ is randomly sampled from the range specified by 'limit'.
  238. 2. The image is rotated around its center by θ degrees.
  239. 3. The rotation matrix R is:
  240. R = [cos(θ) -sin(θ)]
  241. [sin(θ) cos(θ)]
  242. 4. Each point (x, y) in the image is transformed to (x', y') by:
  243. [x'] [cos(θ) -sin(θ)] [x - cx] [cx]
  244. [y'] = [sin(θ) cos(θ)] [y - cy] + [cy]
  245. where (cx, cy) is the center of the image.
  246. 5. If 'crop_border' is True, the image is cropped to the largest rectangle that fits inside the rotated image.
  247. Examples:
  248. >>> import numpy as np
  249. >>> import albumentations as A
  250. >>> # Create example data
  251. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  252. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  253. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  254. >>> bbox_labels = [1, 2] # Class labels for bounding boxes
  255. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  256. >>> keypoint_labels = [0, 1] # Labels for keypoints
  257. >>> # Define the transform
  258. >>> transform = A.Compose([
  259. ... A.Rotate(limit=45, p=1.0),
  260. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  261. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  262. >>> # Apply the transform to all targets
  263. >>> transformed = transform(
  264. ... image=image,
  265. ... mask=mask,
  266. ... bboxes=bboxes,
  267. ... bbox_labels=bbox_labels,
  268. ... keypoints=keypoints,
  269. ... keypoint_labels=keypoint_labels
  270. ... )
  271. >>> rotated_image = transformed["image"]
  272. >>> rotated_mask = transformed["mask"]
  273. >>> rotated_bboxes = transformed["bboxes"]
  274. >>> rotated_bbox_labels = transformed["bbox_labels"]
  275. >>> rotated_keypoints = transformed["keypoints"]
  276. >>> rotated_keypoint_labels = transformed["keypoint_labels"]
  277. """
  278. _targets = ALL_TARGETS
  279. class InitSchema(RotateInitSchema):
  280. rotate_method: Literal["largest_box", "ellipse"]
  281. crop_border: bool
  282. fill: tuple[float, ...] | float
  283. fill_mask: tuple[float, ...] | float
  284. def __init__(
  285. self,
  286. limit: tuple[float, float] | float = (-90, 90),
  287. interpolation: Literal[
  288. cv2.INTER_NEAREST,
  289. cv2.INTER_LINEAR,
  290. cv2.INTER_CUBIC,
  291. cv2.INTER_AREA,
  292. cv2.INTER_LANCZOS4,
  293. ] = cv2.INTER_LINEAR,
  294. border_mode: Literal[
  295. cv2.BORDER_CONSTANT,
  296. cv2.BORDER_REPLICATE,
  297. cv2.BORDER_REFLECT,
  298. cv2.BORDER_WRAP,
  299. cv2.BORDER_REFLECT_101,
  300. ] = cv2.BORDER_CONSTANT,
  301. rotate_method: Literal["largest_box", "ellipse"] = "largest_box",
  302. crop_border: bool = False,
  303. mask_interpolation: Literal[
  304. cv2.INTER_NEAREST,
  305. cv2.INTER_LINEAR,
  306. cv2.INTER_CUBIC,
  307. cv2.INTER_AREA,
  308. cv2.INTER_LANCZOS4,
  309. ] = cv2.INTER_NEAREST,
  310. fill: tuple[float, ...] | float = 0,
  311. fill_mask: tuple[float, ...] | float = 0,
  312. p: float = 0.5,
  313. ):
  314. super().__init__(p=p)
  315. self.limit = cast("tuple[float, float]", limit)
  316. self.interpolation = interpolation
  317. self.mask_interpolation = mask_interpolation
  318. self.border_mode = border_mode
  319. self.fill = fill
  320. self.fill_mask = fill_mask
  321. self.rotate_method = rotate_method
  322. self.crop_border = crop_border
  323. def apply(
  324. self,
  325. img: np.ndarray,
  326. matrix: np.ndarray,
  327. x_min: int,
  328. x_max: int,
  329. y_min: int,
  330. y_max: int,
  331. **params: Any,
  332. ) -> np.ndarray:
  333. """Apply affine transformation to the image.
  334. Args:
  335. img (np.ndarray): Image to transform.
  336. matrix (np.ndarray): Affine transformation matrix.
  337. x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
  338. x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
  339. y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
  340. y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
  341. **params (Any): Additional parameters.
  342. Returns:
  343. np.ndarray: Transformed image.
  344. """
  345. img_out = fgeometric.warp_affine(
  346. img,
  347. matrix,
  348. self.interpolation,
  349. self.fill,
  350. self.border_mode,
  351. params["shape"][:2],
  352. )
  353. if self.crop_border:
  354. return fcrops.crop(img_out, x_min, y_min, x_max, y_max)
  355. return img_out
  356. def apply_to_mask(
  357. self,
  358. mask: np.ndarray,
  359. matrix: np.ndarray,
  360. x_min: int,
  361. x_max: int,
  362. y_min: int,
  363. y_max: int,
  364. **params: Any,
  365. ) -> np.ndarray:
  366. """Apply affine transformation to the mask.
  367. Args:
  368. mask (np.ndarray): Mask to transform.
  369. matrix (np.ndarray): Affine transformation matrix.
  370. x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
  371. x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
  372. y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
  373. y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
  374. **params (Any): Additional parameters.
  375. Returns:
  376. np.ndarray: Transformed mask.
  377. """
  378. img_out = fgeometric.warp_affine(
  379. mask,
  380. matrix,
  381. self.mask_interpolation,
  382. self.fill_mask,
  383. self.border_mode,
  384. params["shape"][:2],
  385. )
  386. if self.crop_border:
  387. return fcrops.crop(img_out, x_min, y_min, x_max, y_max)
  388. return img_out
  389. def apply_to_bboxes(
  390. self,
  391. bboxes: np.ndarray,
  392. bbox_matrix: np.ndarray,
  393. x_min: int,
  394. x_max: int,
  395. y_min: int,
  396. y_max: int,
  397. **params: Any,
  398. ) -> np.ndarray:
  399. """Apply affine transformation to bounding boxes.
  400. Args:
  401. bboxes (np.ndarray): Bounding boxes to transform.
  402. bbox_matrix (np.ndarray): Affine transformation matrix for bounding boxes.
  403. x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
  404. x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
  405. y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
  406. y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
  407. **params (Any): Additional parameters.
  408. Returns:
  409. np.ndarray: Transformed bounding boxes.
  410. """
  411. image_shape = params["shape"][:2]
  412. bboxes_out = fgeometric.bboxes_affine(
  413. bboxes,
  414. bbox_matrix,
  415. self.rotate_method,
  416. image_shape,
  417. self.border_mode,
  418. image_shape,
  419. )
  420. if self.crop_border:
  421. return fcrops.crop_bboxes_by_coords(
  422. bboxes_out,
  423. (x_min, y_min, x_max, y_max),
  424. image_shape,
  425. )
  426. return bboxes_out
  427. def apply_to_keypoints(
  428. self,
  429. keypoints: np.ndarray,
  430. matrix: np.ndarray,
  431. x_min: int,
  432. x_max: int,
  433. y_min: int,
  434. y_max: int,
  435. **params: Any,
  436. ) -> np.ndarray:
  437. """Apply affine transformation to keypoints.
  438. Args:
  439. keypoints (np.ndarray): Keypoints to transform.
  440. matrix (np.ndarray): Affine transformation matrix.
  441. x_min (int): Minimum x-coordinate for cropping (if crop_border is True).
  442. x_max (int): Maximum x-coordinate for cropping (if crop_border is True).
  443. y_min (int): Minimum y-coordinate for cropping (if crop_border is True).
  444. y_max (int): Maximum y-coordinate for cropping (if crop_border is True).
  445. **params (Any): Additional parameters.
  446. Returns:
  447. np.ndarray: Transformed keypoints.
  448. """
  449. keypoints_out = fgeometric.keypoints_affine(
  450. keypoints,
  451. matrix,
  452. params["shape"][:2],
  453. scale={"x": 1, "y": 1},
  454. border_mode=self.border_mode,
  455. )
  456. if self.crop_border:
  457. return fcrops.crop_keypoints_by_coords(
  458. keypoints_out,
  459. (x_min, y_min, x_max, y_max),
  460. )
  461. return keypoints_out
  462. @staticmethod
  463. def _rotated_rect_with_max_area(
  464. height: int,
  465. width: int,
  466. angle: float,
  467. ) -> dict[str, int]:
  468. """Given a rectangle of size wxh that has been rotated by 'angle' (in
  469. degrees), computes the width and height of the largest possible
  470. axis-aligned rectangle (maximal area) within the rotated rectangle.
  471. References:
  472. Rotate image and crop out black borders: https://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
  473. """
  474. angle = math.radians(angle)
  475. width_is_longer = width >= height
  476. side_long, side_short = (width, height) if width_is_longer else (height, width)
  477. # since the solutions for angle, -angle and 180-angle are all the same,
  478. # it is sufficient to look at the first quadrant and the absolute values of sin,cos:
  479. sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
  480. if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < SMALL_NUMBER:
  481. # half constrained case: two crop corners touch the longer side,
  482. # the other two corners are on the mid-line parallel to the longer line
  483. x = 0.5 * side_short
  484. wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
  485. else:
  486. # fully constrained case: crop touches all 4 sides
  487. cos_2a = cos_a * cos_a - sin_a * sin_a
  488. wr, hr = (
  489. (width * cos_a - height * sin_a) / cos_2a,
  490. (height * cos_a - width * sin_a) / cos_2a,
  491. )
  492. return {
  493. "x_min": max(0, int(width / 2 - wr / 2)),
  494. "x_max": min(width, int(width / 2 + wr / 2)),
  495. "y_min": max(0, int(height / 2 - hr / 2)),
  496. "y_max": min(height, int(height / 2 + hr / 2)),
  497. }
  498. def get_params_dependent_on_data(
  499. self,
  500. params: dict[str, Any],
  501. data: dict[str, Any],
  502. ) -> dict[str, Any]:
  503. """Get parameters dependent on the data.
  504. Args:
  505. params (dict[str, Any]): Dictionary containing parameters.
  506. data (dict[str, Any]): Dictionary containing data.
  507. Returns:
  508. dict[str, Any]: Dictionary with parameters for transformation.
  509. """
  510. angle = self.py_random.uniform(*self.limit)
  511. if self.crop_border:
  512. height, width = params["shape"][:2]
  513. out_params = self._rotated_rect_with_max_area(height, width, angle)
  514. else:
  515. out_params = {"x_min": -1, "x_max": -1, "y_min": -1, "y_max": -1}
  516. center = fgeometric.center(params["shape"][:2])
  517. bbox_center = fgeometric.center_bbox(params["shape"][:2])
  518. translate: dict[str, int] = {"x": 0, "y": 0}
  519. shear: dict[str, float] = {"x": 0, "y": 0}
  520. scale: dict[str, float] = {"x": 1, "y": 1}
  521. rotate = angle
  522. matrix = fgeometric.create_affine_transformation_matrix(
  523. translate,
  524. shear,
  525. scale,
  526. rotate,
  527. center,
  528. )
  529. bbox_matrix = fgeometric.create_affine_transformation_matrix(
  530. translate,
  531. shear,
  532. scale,
  533. rotate,
  534. bbox_center,
  535. )
  536. out_params["matrix"] = matrix
  537. out_params["bbox_matrix"] = bbox_matrix
  538. return out_params
  539. class SafeRotate(Affine):
  540. """Rotate the input inside the input's frame by an angle selected randomly from the uniform distribution.
  541. This transformation ensures that the entire rotated image fits within the original frame by scaling it
  542. down if necessary. The resulting image maintains its original dimensions but may contain artifacts due to the
  543. rotation and scaling process.
  544. Args:
  545. limit (float | tuple[float, float]): Range from which a random angle is picked. If limit is a single float,
  546. an angle is picked from (-limit, limit). Default: (-90, 90)
  547. interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
  548. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  549. Default: cv2.INTER_LINEAR.
  550. border_mode (OpenCV flag): Flag that is used to specify the pixel extrapolation method. Should be one of:
  551. cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101.
  552. Default: cv2.BORDER_REFLECT_101
  553. fill (tuple[float, float] | float): Padding value if border_mode is cv2.BORDER_CONSTANT.
  554. fill_mask (tuple[float, float] | float): Padding value if border_mode is cv2.BORDER_CONSTANT applied
  555. for masks.
  556. rotate_method (Literal["largest_box", "ellipse"]): Method to rotate bounding boxes.
  557. Should be 'largest_box' or 'ellipse'. Default: 'largest_box'
  558. mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
  559. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  560. Default: cv2.INTER_NEAREST.
  561. p (float): Probability of applying the transform. Default: 0.5.
  562. Targets:
  563. image, mask, bboxes, keypoints, volume, mask3d
  564. Image types:
  565. uint8, float32
  566. Note:
  567. - The rotation is performed around the center of the image.
  568. - After rotation, the image is scaled to fit within the original frame, which may cause some distortion.
  569. - The output image will always have the same dimensions as the input image.
  570. - Bounding boxes and keypoints are transformed along with the image.
  571. Mathematical Details:
  572. 1. An angle θ is randomly sampled from the range specified by 'limit'.
  573. 2. The image is rotated around its center by θ degrees.
  574. 3. The rotation matrix R is:
  575. R = [cos(θ) -sin(θ)]
  576. [sin(θ) cos(θ)]
  577. 4. The scaling factor s is calculated to ensure the rotated image fits within the original frame:
  578. s = min(width / (width * |cos(θ)| + height * |sin(θ)|),
  579. height / (width * |sin(θ)| + height * |cos(θ)|))
  580. 5. The combined transformation matrix T is:
  581. T = [s*cos(θ) -s*sin(θ) tx]
  582. [s*sin(θ) s*cos(θ) ty]
  583. where tx and ty are translation factors to keep the image centered.
  584. 6. Each point (x, y) in the image is transformed to (x', y') by:
  585. [x'] [s*cos(θ) s*sin(θ)] [x - cx] [cx]
  586. [y'] = [-s*sin(θ) s*cos(θ)] [y - cy] + [cy]
  587. where (cx, cy) is the center of the image.
  588. Examples:
  589. >>> import numpy as np
  590. >>> import albumentations as A
  591. >>> # Create example data
  592. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  593. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  594. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  595. >>> bbox_labels = [1, 2] # Class labels for bounding boxes
  596. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  597. >>> keypoint_labels = [0, 1] # Labels for keypoints
  598. >>> # Define the transform
  599. >>> transform = A.Compose([
  600. ... A.SafeRotate(limit=45, p=1.0),
  601. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  602. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  603. >>> # Apply the transform to all targets
  604. >>> transformed = transform(
  605. ... image=image,
  606. ... mask=mask,
  607. ... bboxes=bboxes,
  608. ... bbox_labels=bbox_labels,
  609. ... keypoints=keypoints,
  610. ... keypoint_labels=keypoint_labels
  611. ... )
  612. >>> rotated_image = transformed["image"]
  613. >>> rotated_mask = transformed["mask"]
  614. >>> rotated_bboxes = transformed["bboxes"]
  615. >>> rotated_bbox_labels = transformed["bbox_labels"]
  616. >>> rotated_keypoints = transformed["keypoints"]
  617. >>> rotated_keypoint_labels = transformed["keypoint_labels"]
  618. """
  619. _targets = ALL_TARGETS
  620. class InitSchema(RotateInitSchema):
  621. rotate_method: Literal["largest_box", "ellipse"]
  622. def __init__(
  623. self,
  624. limit: tuple[float, float] | float = (-90, 90),
  625. interpolation: Literal[
  626. cv2.INTER_NEAREST,
  627. cv2.INTER_LINEAR,
  628. cv2.INTER_CUBIC,
  629. cv2.INTER_AREA,
  630. cv2.INTER_LANCZOS4,
  631. ] = cv2.INTER_LINEAR,
  632. border_mode: Literal[
  633. cv2.BORDER_CONSTANT,
  634. cv2.BORDER_REPLICATE,
  635. cv2.BORDER_REFLECT,
  636. cv2.BORDER_WRAP,
  637. cv2.BORDER_REFLECT_101,
  638. ] = cv2.BORDER_CONSTANT,
  639. rotate_method: Literal["largest_box", "ellipse"] = "largest_box",
  640. mask_interpolation: Literal[
  641. cv2.INTER_NEAREST,
  642. cv2.INTER_LINEAR,
  643. cv2.INTER_CUBIC,
  644. cv2.INTER_AREA,
  645. cv2.INTER_LANCZOS4,
  646. ] = cv2.INTER_NEAREST,
  647. fill: tuple[float, ...] | float = 0,
  648. fill_mask: tuple[float, ...] | float = 0,
  649. p: float = 0.5,
  650. ):
  651. super().__init__(
  652. rotate=limit,
  653. interpolation=interpolation,
  654. border_mode=border_mode,
  655. fill=fill,
  656. fill_mask=fill_mask,
  657. rotate_method=rotate_method,
  658. fit_output=True,
  659. mask_interpolation=mask_interpolation,
  660. p=p,
  661. )
  662. self.limit = cast("tuple[float, float]", limit)
  663. def _create_safe_rotate_matrix(
  664. self,
  665. angle: float,
  666. center: tuple[float, float],
  667. image_shape: tuple[int, int],
  668. ) -> tuple[np.ndarray, dict[str, float]]:
  669. height, width = image_shape[:2]
  670. rotation_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
  671. # Calculate new image size
  672. abs_cos = abs(rotation_mat[0, 0])
  673. abs_sin = abs(rotation_mat[0, 1])
  674. new_w = int(height * abs_sin + width * abs_cos)
  675. new_h = int(height * abs_cos + width * abs_sin)
  676. # Adjust the rotation matrix to take into account the new size
  677. rotation_mat[0, 2] += new_w / 2 - center[0]
  678. rotation_mat[1, 2] += new_h / 2 - center[1]
  679. # Calculate scaling factors
  680. scale_x = width / new_w
  681. scale_y = height / new_h
  682. # Create scaling matrix
  683. scale_mat = np.array([[scale_x, 0, 0], [0, scale_y, 0], [0, 0, 1]])
  684. # Combine rotation and scaling
  685. matrix = scale_mat @ np.vstack([rotation_mat, [0, 0, 1]])
  686. return matrix, {"x": scale_x, "y": scale_y}
  687. def get_params_dependent_on_data(
  688. self,
  689. params: dict[str, Any],
  690. data: dict[str, Any],
  691. ) -> dict[str, Any]:
  692. """Get parameters dependent on the data.
  693. Args:
  694. params (dict[str, Any]): Dictionary containing parameters.
  695. data (dict[str, Any]): Dictionary containing data.
  696. Returns:
  697. dict[str, Any]: Dictionary with parameters for transformation.
  698. """
  699. image_shape = params["shape"][:2]
  700. angle = self.py_random.uniform(*self.limit)
  701. # Calculate centers for image and bbox
  702. image_center = fgeometric.center(image_shape)
  703. bbox_center = fgeometric.center_bbox(image_shape)
  704. # Create matrices for image and bbox
  705. matrix, scale = self._create_safe_rotate_matrix(
  706. angle,
  707. image_center,
  708. image_shape,
  709. )
  710. bbox_matrix, _ = self._create_safe_rotate_matrix(
  711. angle,
  712. bbox_center,
  713. image_shape,
  714. )
  715. return {
  716. "rotate": angle,
  717. "scale": scale,
  718. "matrix": matrix,
  719. "bbox_matrix": bbox_matrix,
  720. "output_shape": image_shape,
  721. }