resize.py 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956
  1. """Transforms for resizing images and associated data.
  2. This module provides transform classes for resizing operations, including uniform resizing,
  3. scaling with aspect ratio preservation, and size-constrained transformations.
  4. """
  5. from __future__ import annotations
  6. from collections.abc import Sequence
  7. from typing import Any, Literal, cast
  8. import cv2
  9. import numpy as np
  10. from albucore import batch_transform
  11. from pydantic import Field, field_validator, model_validator
  12. from typing_extensions import Self
  13. from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
  14. from albumentations.core.type_definitions import ALL_TARGETS
  15. from albumentations.core.utils import to_tuple
  16. from . import functional as fgeometric
  17. __all__ = ["LongestMaxSize", "RandomScale", "Resize", "SmallestMaxSize"]
  18. class RandomScale(DualTransform):
  19. """Randomly resize the input. Output image size is different from the input image size.
  20. Args:
  21. scale_limit (float or tuple[float, float]): scaling factor range. If scale_limit is a single float value, the
  22. range will be (-scale_limit, scale_limit). Note that the scale_limit will be biased by 1.
  23. If scale_limit is a tuple, like (low, high), sampling will be done from the range (1 + low, 1 + high).
  24. Default: (-0.1, 0.1).
  25. interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
  26. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  27. Default: cv2.INTER_LINEAR.
  28. mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
  29. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  30. Default: cv2.INTER_NEAREST.
  31. area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
  32. for downscaling. Options:
  33. - None: No automatic interpolation selection, always use the specified interpolation method
  34. - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
  35. - "image_mask": Use INTER_AREA when downscaling both images and masks
  36. Default: None.
  37. p (float): probability of applying the transform. Default: 0.5.
  38. Targets:
  39. image, mask, bboxes, keypoints, volume, mask3d
  40. Image types:
  41. uint8, float32
  42. Note:
  43. - The output image size is different from the input image size.
  44. - Scale factor is sampled independently per image side (width and height).
  45. - Bounding box coordinates are scaled accordingly.
  46. - Keypoint coordinates are scaled accordingly.
  47. - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
  48. downscaling (scale < 1.0), which provides better quality for size reduction.
  49. Mathematical formulation:
  50. Let (W, H) be the original image dimensions and (W', H') be the output dimensions.
  51. The scale factor s is sampled from the range [1 + scale_limit[0], 1 + scale_limit[1]].
  52. Then, W' = W * s and H' = H * s.
  53. Examples:
  54. >>> import numpy as np
  55. >>> import albumentations as A
  56. >>> import cv2
  57. >>>
  58. >>> # Create sample data for demonstration
  59. >>> image = np.zeros((100, 100, 3), dtype=np.uint8)
  60. >>> # Add some shapes to visualize scaling effects
  61. >>> cv2.rectangle(image, (25, 25), (75, 75), (255, 0, 0), -1) # Red square
  62. >>> cv2.circle(image, (50, 50), 10, (0, 255, 0), -1) # Green circle
  63. >>>
  64. >>> # Create a mask for segmentation
  65. >>> mask = np.zeros((100, 100), dtype=np.uint8)
  66. >>> mask[25:75, 25:75] = 1 # Mask covering the red square
  67. >>>
  68. >>> # Create bounding boxes and keypoints
  69. >>> bboxes = np.array([[25, 25, 75, 75]]) # Box around the red square
  70. >>> bbox_labels = [1]
  71. >>> keypoints = np.array([[50, 50]]) # Center of circle
  72. >>> keypoint_labels = [0]
  73. >>>
  74. >>> # Apply RandomScale transform with comprehensive parameters
  75. >>> transform = A.Compose([
  76. ... A.RandomScale(
  77. ... scale_limit=(-0.3, 0.5), # Scale between 0.7x and 1.5x
  78. ... interpolation=cv2.INTER_LINEAR,
  79. ... mask_interpolation=cv2.INTER_NEAREST,
  80. ... area_for_downscale="image", # Use INTER_AREA for image downscaling
  81. ... p=1.0 # Always apply
  82. ... )
  83. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  84. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  85. >>>
  86. >>> # Apply the transform to all targets
  87. >>> result = transform(
  88. ... image=image,
  89. ... mask=mask,
  90. ... bboxes=bboxes,
  91. ... bbox_labels=bbox_labels,
  92. ... keypoints=keypoints,
  93. ... keypoint_labels=keypoint_labels
  94. ... )
  95. >>>
  96. >>> # Get the transformed results
  97. >>> scaled_image = result['image'] # Dimensions will be between 70-150 pixels
  98. >>> scaled_mask = result['mask'] # Mask scaled proportionally to image
  99. >>> scaled_bboxes = result['bboxes'] # Bounding boxes adjusted to new dimensions
  100. >>> scaled_bbox_labels = result['bbox_labels'] # Labels remain unchanged
  101. >>> scaled_keypoints = result['keypoints'] # Keypoints adjusted to new dimensions
  102. >>> scaled_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
  103. >>>
  104. >>> # The image dimensions will vary based on the randomly sampled scale factor
  105. >>> # With scale_limit=(-0.3, 0.5), dimensions could be anywhere from 70% to 150% of original
  106. """
  107. _targets = ALL_TARGETS
  108. class InitSchema(BaseTransformInitSchema):
  109. scale_limit: tuple[float, float] | float
  110. area_for_downscale: Literal[None, "image", "image_mask"]
  111. interpolation: Literal[
  112. cv2.INTER_NEAREST,
  113. cv2.INTER_NEAREST_EXACT,
  114. cv2.INTER_LINEAR,
  115. cv2.INTER_CUBIC,
  116. cv2.INTER_AREA,
  117. cv2.INTER_LANCZOS4,
  118. cv2.INTER_LINEAR_EXACT,
  119. ]
  120. mask_interpolation: Literal[
  121. cv2.INTER_NEAREST,
  122. cv2.INTER_NEAREST_EXACT,
  123. cv2.INTER_LINEAR,
  124. cv2.INTER_CUBIC,
  125. cv2.INTER_AREA,
  126. cv2.INTER_LANCZOS4,
  127. cv2.INTER_LINEAR_EXACT,
  128. ]
  129. @field_validator("scale_limit")
  130. @classmethod
  131. def _check_scale_limit(cls, v: tuple[float, float] | float) -> tuple[float, float]:
  132. return to_tuple(v)
  133. def __init__(
  134. self,
  135. scale_limit: tuple[float, float] | float = (-0.1, 0.1),
  136. interpolation: Literal[
  137. cv2.INTER_NEAREST,
  138. cv2.INTER_NEAREST_EXACT,
  139. cv2.INTER_LINEAR,
  140. cv2.INTER_CUBIC,
  141. cv2.INTER_AREA,
  142. cv2.INTER_LANCZOS4,
  143. cv2.INTER_LINEAR_EXACT,
  144. ] = cv2.INTER_LINEAR,
  145. mask_interpolation: Literal[
  146. cv2.INTER_NEAREST,
  147. cv2.INTER_NEAREST_EXACT,
  148. cv2.INTER_LINEAR,
  149. cv2.INTER_CUBIC,
  150. cv2.INTER_AREA,
  151. cv2.INTER_LANCZOS4,
  152. cv2.INTER_LINEAR_EXACT,
  153. ] = cv2.INTER_NEAREST,
  154. area_for_downscale: Literal[None, "image", "image_mask"] = None,
  155. p: float = 0.5,
  156. ):
  157. super().__init__(p=p)
  158. self.scale_limit = cast("tuple[float, float]", scale_limit)
  159. self.interpolation = interpolation
  160. self.mask_interpolation = mask_interpolation
  161. self.area_for_downscale = area_for_downscale
  162. def get_params(self) -> dict[str, float]:
  163. """Get parameters for the transform.
  164. Returns:
  165. dict[str, float]: Dictionary with parameters.
  166. """
  167. return {"scale": self.py_random.uniform(*self.scale_limit) + 1.0}
  168. def apply(
  169. self,
  170. img: np.ndarray,
  171. scale: float,
  172. **params: Any,
  173. ) -> np.ndarray:
  174. """Apply scaling to the image.
  175. Args:
  176. img (np.ndarray): Image to scale.
  177. scale (float): Scaling factor.
  178. **params (Any): Additional parameters.
  179. Returns:
  180. np.ndarray: Scaled image.
  181. """
  182. interpolation = self.interpolation
  183. if self.area_for_downscale in ["image", "image_mask"] and scale < 1.0:
  184. interpolation = cv2.INTER_AREA
  185. return fgeometric.scale(img, scale, interpolation)
  186. def apply_to_mask(
  187. self,
  188. mask: np.ndarray,
  189. scale: float,
  190. **params: Any,
  191. ) -> np.ndarray:
  192. """Apply scaling to the mask.
  193. Args:
  194. mask (np.ndarray): Mask to scale.
  195. scale (float): Scaling factor.
  196. **params (Any): Additional parameters.
  197. Returns:
  198. np.ndarray: Scaled mask.
  199. """
  200. interpolation = self.mask_interpolation
  201. if self.area_for_downscale == "image_mask" and scale < 1.0:
  202. interpolation = cv2.INTER_AREA
  203. return fgeometric.scale(mask, scale, interpolation)
  204. def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
  205. """Apply the transform to bounding boxes.
  206. Args:
  207. bboxes (np.ndarray): Bounding boxes to transform.
  208. **params (Any): Additional parameters.
  209. Returns:
  210. np.ndarray: Transformed bounding boxes which are scale invariant.
  211. """
  212. # Bounding box coordinates are scale invariant
  213. return bboxes
  214. def apply_to_keypoints(
  215. self,
  216. keypoints: np.ndarray,
  217. scale: float,
  218. **params: Any,
  219. ) -> np.ndarray:
  220. """Apply scaling to keypoints.
  221. Args:
  222. keypoints (np.ndarray): Keypoints to scale.
  223. scale (float): Scaling factor.
  224. **params (Any): Additional parameters.
  225. Returns:
  226. np.ndarray: Scaled keypoints.
  227. """
  228. return fgeometric.keypoints_scale(keypoints, scale, scale)
  229. class MaxSizeTransform(DualTransform):
  230. """Base class for transforms that resize based on maximum size constraints.
  231. This class provides common functionality for derived transforms like LongestMaxSize and
  232. SmallestMaxSize that resize images based on size constraints while preserving aspect ratio.
  233. Args:
  234. max_size (int, Sequence[int], optional): Maximum size constraint. The specific interpretation
  235. depends on the derived class. Default: None.
  236. max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints.
  237. Either max_size or max_size_hw must be specified, but not both. Default: None.
  238. interpolation (OpenCV flag): Flag for the interpolation algorithm. Should be one of:
  239. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  240. Default: cv2.INTER_LINEAR.
  241. mask_interpolation (OpenCV flag): Flag for the mask interpolation algorithm.
  242. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  243. Default: cv2.INTER_NEAREST.
  244. area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
  245. for downscaling. Options:
  246. - None: No automatic interpolation selection, always use the specified interpolation method
  247. - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
  248. - "image_mask": Use INTER_AREA when downscaling both images and masks
  249. Default: None.
  250. p (float): Probability of applying the transform. Default: 1.
  251. Targets:
  252. image, mask, bboxes, keypoints, volume, mask3d
  253. Image types:
  254. uint8, float32
  255. Note:
  256. - This is a base class that should be extended by concrete resize transforms.
  257. - The scaling calculation is implemented in derived classes.
  258. - Aspect ratio is preserved by applying the same scale factor to both dimensions.
  259. - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
  260. downscaling (scale < 1.0), which provides better quality for size reduction.
  261. Examples:
  262. >>> import numpy as np
  263. >>> import albumentations as A
  264. >>> import cv2
  265. >>>
  266. >>> # Example of creating a custom transform that extends MaxSizeTransform
  267. >>> class CustomMaxSize(A.MaxSizeTransform):
  268. ... def get_params_dependent_on_data(self, params, data):
  269. ... img_h, img_w = params["shape"][:2]
  270. ... # Calculate scale factor - here we scale to make the image area constant
  271. ... target_area = 300 * 300 # Target area of 300x300
  272. ... current_area = img_h * img_w
  273. ... scale = np.sqrt(target_area / current_area)
  274. ... return {"scale": scale}
  275. >>>
  276. >>> # Prepare sample data
  277. >>> image = np.zeros((100, 200, 3), dtype=np.uint8)
  278. >>> # Add a rectangle to visualize the effect
  279. >>> cv2.rectangle(image, (50, 20), (150, 80), (255, 0, 0), -1)
  280. >>>
  281. >>> # Create a mask
  282. >>> mask = np.zeros((100, 200), dtype=np.uint8)
  283. >>> mask[20:80, 50:150] = 1
  284. >>>
  285. >>> # Create bounding boxes and keypoints
  286. >>> bboxes = np.array([[50, 20, 150, 80]])
  287. >>> bbox_labels = [1]
  288. >>> keypoints = np.array([[100, 50]])
  289. >>> keypoint_labels = [0]
  290. >>>
  291. >>> # Apply the custom transform
  292. >>> transform = A.Compose([
  293. ... CustomMaxSize(
  294. ... max_size=None,
  295. ... max_size_hw=(None, None), # Not used in our custom implementation
  296. ... interpolation=cv2.INTER_LINEAR,
  297. ... mask_interpolation=cv2.INTER_NEAREST,
  298. ... area_for_downscale="image", # Use INTER_AREA when downscaling images
  299. ... p=1.0
  300. ... )
  301. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  302. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  303. >>>
  304. >>> # Apply the transform
  305. >>> result = transform(
  306. ... image=image,
  307. ... mask=mask,
  308. ... bboxes=bboxes,
  309. ... bbox_labels=bbox_labels,
  310. ... keypoints=keypoints,
  311. ... keypoint_labels=keypoint_labels
  312. ... )
  313. >>>
  314. >>> # Get results
  315. >>> transformed_image = result['image'] # Shape will be approximately (122, 245, 3)
  316. >>> transformed_mask = result['mask'] # Shape will be approximately (122, 245)
  317. >>> transformed_bboxes = result['bboxes'] # Bounding boxes are scale invariant
  318. >>> transformed_keypoints = result['keypoints'] # Keypoints scaled proportionally
  319. >>> transformed_bbox_labels = result['bbox_labels'] # Labels remain unchanged
  320. >>> transformed_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
  321. """
  322. _targets = ALL_TARGETS
  323. class InitSchema(BaseTransformInitSchema):
  324. max_size: int | list[int] | None
  325. max_size_hw: tuple[int | None, int | None] | None
  326. area_for_downscale: Literal[None, "image", "image_mask"]
  327. interpolation: Literal[
  328. cv2.INTER_NEAREST,
  329. cv2.INTER_NEAREST_EXACT,
  330. cv2.INTER_LINEAR,
  331. cv2.INTER_CUBIC,
  332. cv2.INTER_AREA,
  333. cv2.INTER_LANCZOS4,
  334. cv2.INTER_LINEAR_EXACT,
  335. ]
  336. mask_interpolation: Literal[
  337. cv2.INTER_NEAREST,
  338. cv2.INTER_NEAREST_EXACT,
  339. cv2.INTER_LINEAR,
  340. cv2.INTER_CUBIC,
  341. cv2.INTER_AREA,
  342. cv2.INTER_LANCZOS4,
  343. cv2.INTER_LINEAR_EXACT,
  344. ]
  345. @model_validator(mode="after")
  346. def validate_size_parameters(self) -> Self:
  347. if self.max_size is None and self.max_size_hw is None:
  348. raise ValueError("Either max_size or max_size_hw must be specified")
  349. if self.max_size is not None and self.max_size_hw is not None:
  350. raise ValueError("Only one of max_size or max_size_hw should be specified")
  351. return self
  352. def __init__(
  353. self,
  354. max_size: int | Sequence[int] | None = None,
  355. max_size_hw: tuple[int | None, int | None] | None = None,
  356. interpolation: Literal[
  357. cv2.INTER_NEAREST,
  358. cv2.INTER_NEAREST_EXACT,
  359. cv2.INTER_LINEAR,
  360. cv2.INTER_CUBIC,
  361. cv2.INTER_AREA,
  362. cv2.INTER_LANCZOS4,
  363. cv2.INTER_LINEAR_EXACT,
  364. ] = cv2.INTER_LINEAR,
  365. mask_interpolation: Literal[
  366. cv2.INTER_NEAREST,
  367. cv2.INTER_NEAREST_EXACT,
  368. cv2.INTER_LINEAR,
  369. cv2.INTER_CUBIC,
  370. cv2.INTER_AREA,
  371. cv2.INTER_LANCZOS4,
  372. cv2.INTER_LINEAR_EXACT,
  373. ] = cv2.INTER_NEAREST,
  374. area_for_downscale: Literal[None, "image", "image_mask"] = None,
  375. p: float = 1,
  376. ):
  377. super().__init__(p=p)
  378. self.max_size = max_size
  379. self.max_size_hw = max_size_hw
  380. self.interpolation = interpolation
  381. self.mask_interpolation = mask_interpolation
  382. self.area_for_downscale = area_for_downscale
  383. def apply(
  384. self,
  385. img: np.ndarray,
  386. scale: float,
  387. **params: Any,
  388. ) -> np.ndarray:
  389. height, width = img.shape[:2]
  390. new_height, new_width = max(1, round(height * scale)), max(1, round(width * scale))
  391. interpolation = self.interpolation
  392. if self.area_for_downscale in ["image", "image_mask"] and scale < 1.0:
  393. interpolation = cv2.INTER_AREA
  394. return fgeometric.resize(img, (new_height, new_width), interpolation=interpolation)
  395. def apply_to_mask(
  396. self,
  397. mask: np.ndarray,
  398. scale: float,
  399. **params: Any,
  400. ) -> np.ndarray:
  401. height, width = mask.shape[:2]
  402. new_height, new_width = max(1, round(height * scale)), max(1, round(width * scale))
  403. interpolation = self.mask_interpolation
  404. if self.area_for_downscale == "image_mask" and scale < 1.0:
  405. interpolation = cv2.INTER_AREA
  406. return fgeometric.resize(mask, (new_height, new_width), interpolation=interpolation)
  407. def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
  408. # Bounding box coordinates are scale invariant
  409. return bboxes
  410. def apply_to_keypoints(
  411. self,
  412. keypoints: np.ndarray,
  413. scale: float,
  414. **params: Any,
  415. ) -> np.ndarray:
  416. return fgeometric.keypoints_scale(keypoints, scale, scale)
  417. @batch_transform("spatial", has_batch_dim=True, has_depth_dim=False)
  418. def apply_to_images(self, images: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  419. return self.apply(images, *args, **params)
  420. @batch_transform("spatial", has_batch_dim=False, has_depth_dim=True)
  421. def apply_to_volume(self, volume: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  422. return self.apply(volume, *args, **params)
  423. @batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
  424. def apply_to_volumes(self, volumes: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  425. return self.apply(volumes, *args, **params)
  426. @batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
  427. def apply_to_mask3d(self, mask3d: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  428. return self.apply_to_mask(mask3d, *args, **params)
  429. @batch_transform("spatial", has_batch_dim=True, has_depth_dim=True)
  430. def apply_to_masks3d(self, masks3d: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  431. return self.apply_to_mask(masks3d, *args, **params)
  432. class LongestMaxSize(MaxSizeTransform):
  433. """Rescale an image so that the longest side is equal to max_size or sides meet max_size_hw constraints,
  434. keeping the aspect ratio.
  435. Args:
  436. max_size (int, Sequence[int], optional): Maximum size of the longest side after the transformation.
  437. When using a list or tuple, the max size will be randomly selected from the values provided. Default: None.
  438. max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints. Supports:
  439. - (height, width): Both dimensions must fit within these bounds
  440. - (height, None): Only height is constrained, width scales proportionally
  441. - (None, width): Only width is constrained, height scales proportionally
  442. If specified, max_size must be None. Default: None.
  443. interpolation (OpenCV flag): interpolation method. Default: cv2.INTER_LINEAR.
  444. mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
  445. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  446. Default: cv2.INTER_NEAREST.
  447. area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
  448. for downscaling. Options:
  449. - None: No automatic interpolation selection, always use the specified interpolation method
  450. - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
  451. - "image_mask": Use INTER_AREA when downscaling both images and masks
  452. Default: None.
  453. p (float): probability of applying the transform. Default: 1.
  454. Targets:
  455. image, mask, bboxes, keypoints, volume, mask3d
  456. Image types:
  457. uint8, float32
  458. Note:
  459. - If the longest side of the image is already equal to max_size, the image will not be resized.
  460. - This transform will not crop the image. The resulting image may be smaller than specified in both dimensions.
  461. - For non-square images, both sides will be scaled proportionally to maintain the aspect ratio.
  462. - Bounding boxes and keypoints are scaled accordingly.
  463. - When area_for_downscale is set, INTER_AREA will be used for downscaling, providing better quality.
  464. Mathematical Details:
  465. Let (W, H) be the original width and height of the image.
  466. When using max_size:
  467. 1. The scaling factor s is calculated as:
  468. s = max_size / max(W, H)
  469. 2. The new dimensions (W', H') are:
  470. W' = W * s
  471. H' = H * s
  472. When using max_size_hw=(H_target, W_target):
  473. 1. For both dimensions specified:
  474. s = min(H_target/H, W_target/W)
  475. This ensures both dimensions fit within the specified bounds.
  476. 2. For height only (W_target=None):
  477. s = H_target/H
  478. Width will scale proportionally.
  479. 3. For width only (H_target=None):
  480. s = W_target/W
  481. Height will scale proportionally.
  482. 4. The new dimensions (W', H') are:
  483. W' = W * s
  484. H' = H * s
  485. Examples:
  486. >>> import albumentations as A
  487. >>> import cv2
  488. >>> # Using max_size
  489. >>> transform1 = A.LongestMaxSize(max_size=1024, area_for_downscale="image")
  490. >>> # Input image (1500, 800) -> Output (1024, 546)
  491. >>>
  492. >>> # Using max_size_hw with both dimensions
  493. >>> transform2 = A.LongestMaxSize(max_size_hw=(800, 1024), area_for_downscale="image_mask")
  494. >>> # Input (1500, 800) -> Output (800, 427)
  495. >>> # Input (800, 1500) -> Output (546, 1024)
  496. >>>
  497. >>> # Using max_size_hw with only height
  498. >>> transform3 = A.LongestMaxSize(max_size_hw=(800, None))
  499. >>> # Input (1500, 800) -> Output (800, 427)
  500. >>>
  501. >>> # Common use case with padding
  502. >>> transform4 = A.Compose([
  503. ... A.LongestMaxSize(max_size=1024, area_for_downscale="image"),
  504. ... A.PadIfNeeded(min_height=1024, min_width=1024),
  505. ... ])
  506. """
  507. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  508. """Calculate parameters that depend on the input data.
  509. Args:
  510. params (dict[str, Any]): Parameters dictionary.
  511. data (dict[str, Any]): Dictionary containing input data.
  512. Returns:
  513. dict[str, Any]: Dictionary with parameters calculated based on input data.
  514. """
  515. img_h, img_w = params["shape"][:2]
  516. if self.max_size is not None:
  517. if isinstance(self.max_size, (list, tuple)):
  518. max_size = self.py_random.choice(self.max_size)
  519. else:
  520. max_size = self.max_size
  521. scale = max_size / max(img_h, img_w)
  522. elif self.max_size_hw is not None:
  523. # We know max_size_hw is not None here due to model validator
  524. max_h, max_w = self.max_size_hw
  525. if max_h is not None and max_w is not None:
  526. # Scale based on longest side to maintain aspect ratio
  527. h_scale = max_h / img_h
  528. w_scale = max_w / img_w
  529. scale = min(h_scale, w_scale)
  530. elif max_h is not None:
  531. # Only height specified
  532. scale = max_h / img_h
  533. else:
  534. # Only width specified
  535. scale = max_w / img_w
  536. return {"scale": scale}
  537. class SmallestMaxSize(MaxSizeTransform):
  538. """Rescale an image so that minimum side is equal to max_size or sides meet max_size_hw constraints,
  539. keeping the aspect ratio.
  540. Args:
  541. max_size (int, list of int, optional): Maximum size of smallest side of the image after the transformation.
  542. When using a list, max size will be randomly selected from the values in the list. Default: None.
  543. max_size_hw (tuple[int | None, int | None], optional): Maximum (height, width) constraints. Supports:
  544. - (height, width): Both dimensions must be at least these values
  545. - (height, None): Only height is constrained, width scales proportionally
  546. - (None, width): Only width is constrained, height scales proportionally
  547. If specified, max_size must be None. Default: None.
  548. interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
  549. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  550. Default: cv2.INTER_LINEAR.
  551. mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
  552. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  553. Default: cv2.INTER_NEAREST.
  554. area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
  555. for downscaling. Options:
  556. - None: No automatic interpolation selection, always use the specified interpolation method
  557. - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
  558. - "image_mask": Use INTER_AREA when downscaling both images and masks
  559. Default: None.
  560. p (float): Probability of applying the transform. Default: 1.
  561. Targets:
  562. image, mask, bboxes, keypoints, volume, mask3d
  563. Image types:
  564. uint8, float32
  565. Note:
  566. - If the smallest side of the image is already equal to max_size, the image will not be resized.
  567. - This transform will not crop the image. The resulting image may be larger than specified in both dimensions.
  568. - For non-square images, both sides will be scaled proportionally to maintain the aspect ratio.
  569. - Bounding boxes and keypoints are scaled accordingly.
  570. - When area_for_downscale is set, INTER_AREA will be used for downscaling, providing better quality.
  571. Mathematical Details:
  572. Let (W, H) be the original width and height of the image.
  573. When using max_size:
  574. 1. The scaling factor s is calculated as:
  575. s = max_size / min(W, H)
  576. 2. The new dimensions (W', H') are:
  577. W' = W * s
  578. H' = H * s
  579. When using max_size_hw=(H_target, W_target):
  580. 1. For both dimensions specified:
  581. s = max(H_target/H, W_target/W)
  582. This ensures both dimensions are at least as large as specified.
  583. 2. For height only (W_target=None):
  584. s = H_target/H
  585. Width will scale proportionally.
  586. 3. For width only (H_target=None):
  587. s = W_target/W
  588. Height will scale proportionally.
  589. 4. The new dimensions (W', H') are:
  590. W' = W * s
  591. H' = H * s
  592. Examples:
  593. >>> import numpy as np
  594. >>> import albumentations as A
  595. >>> # Using max_size
  596. >>> transform1 = A.SmallestMaxSize(max_size=120, area_for_downscale="image")
  597. >>> # Input image (100, 150) -> Output (120, 180)
  598. >>>
  599. >>> # Using max_size_hw with both dimensions
  600. >>> transform2 = A.SmallestMaxSize(max_size_hw=(100, 200), area_for_downscale="image_mask")
  601. >>> # Input (80, 160) -> Output (100, 200)
  602. >>> # Input (160, 80) -> Output (400, 200)
  603. >>>
  604. >>> # Using max_size_hw with only height
  605. >>> transform3 = A.SmallestMaxSize(max_size_hw=(100, None))
  606. >>> # Input (80, 160) -> Output (100, 200)
  607. """
  608. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  609. """Calculate parameters that depend on the input data.
  610. Args:
  611. params (dict[str, Any]): Parameters dictionary.
  612. data (dict[str, Any]): Dictionary containing input data.
  613. Returns:
  614. dict[str, Any]: Dictionary with parameters calculated based on input data.
  615. """
  616. img_h, img_w = params["shape"][:2]
  617. if self.max_size is not None:
  618. if isinstance(self.max_size, (list, tuple)):
  619. max_size = self.py_random.choice(self.max_size)
  620. else:
  621. max_size = self.max_size
  622. scale = max_size / min(img_h, img_w)
  623. elif self.max_size_hw is not None:
  624. max_h, max_w = self.max_size_hw
  625. if max_h is not None and max_w is not None:
  626. # Scale based on smallest side to maintain aspect ratio
  627. h_scale = max_h / img_h
  628. w_scale = max_w / img_w
  629. scale = max(h_scale, w_scale)
  630. elif max_h is not None:
  631. # Only height specified
  632. scale = max_h / img_h
  633. else:
  634. # Only width specified
  635. scale = max_w / img_w
  636. return {"scale": scale}
  637. class Resize(DualTransform):
  638. """Resize the input to the given height and width.
  639. Args:
  640. height (int): desired height of the output.
  641. width (int): desired width of the output.
  642. interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
  643. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  644. Default: cv2.INTER_LINEAR.
  645. mask_interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm for mask.
  646. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  647. Default: cv2.INTER_NEAREST.
  648. area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
  649. for downscaling. Options:
  650. - None: No automatic interpolation selection, always use the specified interpolation method
  651. - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
  652. - "image_mask": Use INTER_AREA when downscaling both images and masks
  653. Default: None.
  654. p (float): probability of applying the transform. Default: 1.
  655. Targets:
  656. image, mask, bboxes, keypoints, volume, mask3d
  657. Image types:
  658. uint8, float32
  659. Examples:
  660. >>> import numpy as np
  661. >>> import albumentations as A
  662. >>> import cv2
  663. >>>
  664. >>> # Create sample data for demonstration
  665. >>> image = np.zeros((100, 100, 3), dtype=np.uint8)
  666. >>> # Add some shapes to visualize resize effects
  667. >>> cv2.rectangle(image, (25, 25), (75, 75), (255, 0, 0), -1) # Red square
  668. >>> cv2.circle(image, (50, 50), 10, (0, 255, 0), -1) # Green circle
  669. >>>
  670. >>> # Create a mask for segmentation
  671. >>> mask = np.zeros((100, 100), dtype=np.uint8)
  672. >>> mask[25:75, 25:75] = 1 # Mask covering the red square
  673. >>>
  674. >>> # Create bounding boxes and keypoints
  675. >>> bboxes = np.array([[25, 25, 75, 75]]) # Box around the red square
  676. >>> bbox_labels = [1]
  677. >>> keypoints = np.array([[50, 50]]) # Center of circle
  678. >>> keypoint_labels = [0]
  679. >>>
  680. >>> # Resize all data to 224x224 (common input size for many CNNs)
  681. >>> transform = A.Compose([
  682. ... A.Resize(
  683. ... height=224,
  684. ... width=224,
  685. ... interpolation=cv2.INTER_LINEAR,
  686. ... mask_interpolation=cv2.INTER_NEAREST,
  687. ... area_for_downscale="image", # Use INTER_AREA when downscaling images
  688. ... p=1.0
  689. ... )
  690. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  691. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  692. >>>
  693. >>> # Apply the transform to all targets
  694. >>> result = transform(
  695. ... image=image,
  696. ... mask=mask,
  697. ... bboxes=bboxes,
  698. ... bbox_labels=bbox_labels,
  699. ... keypoints=keypoints,
  700. ... keypoint_labels=keypoint_labels
  701. ... )
  702. >>>
  703. >>> # Get the transformed results
  704. >>> resized_image = result['image'] # Shape will be (224, 224, 3)
  705. >>> resized_mask = result['mask'] # Shape will be (224, 224)
  706. >>> resized_bboxes = result['bboxes'] # Bounding boxes scaled to new dimensions
  707. >>> resized_bbox_labels = result['bbox_labels'] # Labels remain unchanged
  708. >>> resized_keypoints = result['keypoints'] # Keypoints scaled to new dimensions
  709. >>> resized_keypoint_labels = result['keypoint_labels'] # Labels remain unchanged
  710. >>>
  711. >>> # Note: When resizing from 100x100 to 224x224:
  712. >>> # - The red square will be scaled from (25-75) to approximately (56-168)
  713. >>> # - The keypoint at (50, 50) will move to approximately (112, 112)
  714. >>> # - All spatial relationships are preserved but coordinates are scaled
  715. """
  716. _targets = ALL_TARGETS
  717. class InitSchema(BaseTransformInitSchema):
  718. height: int = Field(ge=1)
  719. width: int = Field(ge=1)
  720. area_for_downscale: Literal[None, "image", "image_mask"]
  721. interpolation: Literal[
  722. cv2.INTER_NEAREST,
  723. cv2.INTER_NEAREST_EXACT,
  724. cv2.INTER_LINEAR,
  725. cv2.INTER_CUBIC,
  726. cv2.INTER_AREA,
  727. cv2.INTER_LANCZOS4,
  728. cv2.INTER_LINEAR_EXACT,
  729. ]
  730. mask_interpolation: Literal[
  731. cv2.INTER_NEAREST,
  732. cv2.INTER_NEAREST_EXACT,
  733. cv2.INTER_LINEAR,
  734. cv2.INTER_CUBIC,
  735. cv2.INTER_AREA,
  736. cv2.INTER_LANCZOS4,
  737. cv2.INTER_LINEAR_EXACT,
  738. ]
  739. def __init__(
  740. self,
  741. height: int,
  742. width: int,
  743. interpolation: Literal[
  744. cv2.INTER_NEAREST,
  745. cv2.INTER_NEAREST_EXACT,
  746. cv2.INTER_LINEAR,
  747. cv2.INTER_CUBIC,
  748. cv2.INTER_AREA,
  749. cv2.INTER_LANCZOS4,
  750. cv2.INTER_LINEAR_EXACT,
  751. ] = cv2.INTER_LINEAR,
  752. mask_interpolation: Literal[
  753. cv2.INTER_NEAREST,
  754. cv2.INTER_NEAREST_EXACT,
  755. cv2.INTER_LINEAR,
  756. cv2.INTER_CUBIC,
  757. cv2.INTER_AREA,
  758. cv2.INTER_LANCZOS4,
  759. cv2.INTER_LINEAR_EXACT,
  760. ] = cv2.INTER_NEAREST,
  761. area_for_downscale: Literal[None, "image", "image_mask"] = None,
  762. p: float = 1,
  763. ):
  764. super().__init__(p=p)
  765. self.height = height
  766. self.width = width
  767. self.interpolation = interpolation
  768. self.mask_interpolation = mask_interpolation
  769. self.area_for_downscale = area_for_downscale
  770. def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
  771. """Apply resizing to the image.
  772. Args:
  773. img (np.ndarray): Image to resize.
  774. **params (Any): Additional parameters.
  775. Returns:
  776. np.ndarray: Resized image.
  777. """
  778. height, width = img.shape[:2]
  779. is_downscale = (self.height < height) or (self.width < width)
  780. interpolation = self.interpolation
  781. if self.area_for_downscale in ["image", "image_mask"] and is_downscale:
  782. interpolation = cv2.INTER_AREA
  783. return fgeometric.resize(img, (self.height, self.width), interpolation=interpolation)
  784. def apply_to_mask(self, mask: np.ndarray, **params: Any) -> np.ndarray:
  785. """Apply resizing to the mask.
  786. Args:
  787. mask (np.ndarray): Mask to resize.
  788. **params (Any): Additional parameters.
  789. Returns:
  790. np.ndarray: Resized mask.
  791. """
  792. height, width = mask.shape[:2]
  793. is_downscale = (self.height < height) or (self.width < width)
  794. interpolation = self.mask_interpolation
  795. if self.area_for_downscale == "image_mask" and is_downscale:
  796. interpolation = cv2.INTER_AREA
  797. return fgeometric.resize(mask, (self.height, self.width), interpolation=interpolation)
  798. def apply_to_bboxes(self, bboxes: np.ndarray, **params: Any) -> np.ndarray:
  799. """Apply the transform to bounding boxes.
  800. Args:
  801. bboxes (np.ndarray): Bounding boxes to transform.
  802. **params (Any): Additional parameters.
  803. Returns:
  804. np.ndarray: Transformed bounding boxes which are scale invariant.
  805. """
  806. # Bounding box coordinates are scale invariant
  807. return bboxes
  808. def apply_to_keypoints(self, keypoints: np.ndarray, **params: Any) -> np.ndarray:
  809. """Apply resizing to keypoints.
  810. Args:
  811. keypoints (np.ndarray): Keypoints to resize.
  812. **params (Any): Additional parameters.
  813. Returns:
  814. np.ndarray: Resized keypoints.
  815. """
  816. height, width = params["shape"][:2]
  817. scale_x = self.width / width
  818. scale_y = self.height / height
  819. return fgeometric.keypoints_scale(keypoints, scale_x, scale_y)