pad.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. """Padding transformations for images and related data.
  2. This module provides transformations for padding images and associated data. Padding is the process
  3. of adding pixels to the borders of an image to increase its dimensions. Common use cases include:
  4. - Ensuring uniform sizes for model inputs in a batch
  5. - Making image dimensions divisible by specific values (often required by CNNs)
  6. - Creating space around an image for annotations or visual purposes
  7. - Standardizing data dimensions for processing pipelines
  8. Padding transformations in this module support various border modes (constant, reflection, replication)
  9. and properly handle all target types including images, masks, bounding boxes, and keypoints.
  10. """
  11. from __future__ import annotations
  12. from numbers import Real
  13. from typing import Any, Literal
  14. import cv2
  15. import numpy as np
  16. from pydantic import (
  17. Field,
  18. model_validator,
  19. )
  20. from typing_extensions import Self
  21. from albumentations.core.bbox_utils import (
  22. denormalize_bboxes,
  23. normalize_bboxes,
  24. )
  25. from albumentations.core.transforms_interface import (
  26. BaseTransformInitSchema,
  27. DualTransform,
  28. )
  29. from albumentations.core.type_definitions import ALL_TARGETS
  30. from . import functional as fgeometric
  31. __all__ = [
  32. "Pad",
  33. "PadIfNeeded",
  34. ]
  35. NUM_PADS_XY = 2
  36. NUM_PADS_ALL_SIDES = 4
  37. class Pad(DualTransform):
  38. """Pad the sides of an image by specified number of pixels.
  39. Args:
  40. padding (int, tuple[int, int] or tuple[int, int, int, int]): Padding values. Can be:
  41. * int - pad all sides by this value
  42. * tuple[int, int] - (pad_x, pad_y) to pad left/right by pad_x and top/bottom by pad_y
  43. * tuple[int, int, int, int] - (left, top, right, bottom) specific padding per side
  44. fill (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT
  45. fill_mask (tuple[float, ...] | float): Padding value for mask if border_mode is cv2.BORDER_CONSTANT
  46. border_mode (OpenCV flag): OpenCV border mode
  47. p (float): probability of applying the transform. Default: 1.0.
  48. Targets:
  49. image, mask, bboxes, keypoints, volume, mask3d
  50. Image types:
  51. uint8, float32
  52. References:
  53. PyTorch Pad: https://pytorch.org/vision/main/generated/torchvision.transforms.v2.Pad.html
  54. Examples:
  55. >>> import numpy as np
  56. >>> import albumentations as A
  57. >>> import cv2
  58. >>>
  59. >>> # Prepare sample data
  60. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  61. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  62. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  63. >>> bbox_labels = [1, 2]
  64. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  65. >>> keypoint_labels = [0, 1]
  66. >>>
  67. >>> # Example 1: Pad all sides by the same value
  68. >>> transform = A.Compose([
  69. ... A.Pad(padding=20, border_mode=cv2.BORDER_CONSTANT, fill=0),
  70. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  71. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  72. >>>
  73. >>> # Apply the transform
  74. >>> padded = transform(
  75. ... image=image,
  76. ... mask=mask,
  77. ... bboxes=bboxes,
  78. ... bbox_labels=bbox_labels,
  79. ... keypoints=keypoints,
  80. ... keypoint_labels=keypoint_labels
  81. ... )
  82. >>>
  83. >>> # Get the padded data
  84. >>> padded_image = padded['image'] # Shape will be (140, 140, 3)
  85. >>> padded_mask = padded['mask'] # Shape will be (140, 140)
  86. >>> padded_bboxes = padded['bboxes'] # Bounding boxes coordinates adjusted to the padded image
  87. >>> padded_keypoints = padded['keypoints'] # Keypoints coordinates adjusted to the padded image
  88. >>>
  89. >>> # Example 2: Different padding for sides using (pad_x, pad_y)
  90. >>> transform_xy = A.Compose([
  91. ... A.Pad(
  92. ... padding=(10, 30), # 10px padding on left/right, 30px on top/bottom
  93. ... border_mode=cv2.BORDER_CONSTANT,
  94. ... fill=128 # Gray padding color
  95. ... ),
  96. ... ])
  97. >>>
  98. >>> padded_xy = transform_xy(image=image)
  99. >>> padded_xy_image = padded_xy['image'] # Shape will be (160, 120, 3)
  100. >>>
  101. >>> # Example 3: Different padding for each side
  102. >>> transform_sides = A.Compose([
  103. ... A.Pad(
  104. ... padding=(5, 10, 15, 20), # (left, top, right, bottom)
  105. ... border_mode=cv2.BORDER_CONSTANT,
  106. ... fill=0,
  107. ... fill_mask=0
  108. ... ),
  109. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
  110. >>>
  111. >>> padded_sides = transform_sides(
  112. ... image=image,
  113. ... mask=mask,
  114. ... bboxes=bboxes,
  115. ... bbox_labels=bbox_labels
  116. ... )
  117. >>>
  118. >>> padded_sides_image = padded_sides['image'] # Shape will be (130, 120, 3)
  119. >>> padded_sides_bboxes = padded_sides['bboxes'] # Bounding boxes adjusted to the new coordinates
  120. >>>
  121. >>> # Example 4: Using different border_mode options
  122. >>> # Create a smaller image for better visualization of reflection/wrapping
  123. >>> small_image = np.random.randint(0, 256, (10, 10, 3), dtype=np.uint8)
  124. >>>
  125. >>> # Reflection padding
  126. >>> reflect_pad = A.Compose([
  127. ... A.Pad(padding=5, border_mode=cv2.BORDER_REFLECT_101),
  128. ... ])
  129. >>> reflected = reflect_pad(image=small_image)
  130. >>> reflected_image = reflected['image'] # Shape will be (20, 20, 3) with reflected edges
  131. >>>
  132. >>> # Replicate padding
  133. >>> replicate_pad = A.Compose([
  134. ... A.Pad(padding=5, border_mode=cv2.BORDER_REPLICATE),
  135. ... ])
  136. >>> replicated = replicate_pad(image=small_image)
  137. >>> replicated_image = replicated['image'] # Shape will be (20, 20, 3) with replicated edges
  138. >>>
  139. >>> # Example 5: Padding with masks and constant border mode
  140. >>> binary_mask = np.zeros((50, 50), dtype=np.uint8)
  141. >>> binary_mask[10:40, 10:40] = 1 # Set center region to 1
  142. >>>
  143. >>> mask_transform = A.Compose([
  144. ... A.Pad(
  145. ... padding=10,
  146. ... border_mode=cv2.BORDER_CONSTANT,
  147. ... fill=0, # Black padding for image
  148. ... fill_mask=0 # Use 0 for mask padding (background)
  149. ... ),
  150. ... ])
  151. >>>
  152. >>> padded_mask_result = mask_transform(image=image, mask=binary_mask)
  153. >>> padded_binary_mask = padded_mask_result['mask'] # Shape will be (70, 70)
  154. """
  155. _targets = ALL_TARGETS
  156. class InitSchema(BaseTransformInitSchema):
  157. padding: int | tuple[int, int] | tuple[int, int, int, int]
  158. fill: tuple[float, ...] | float
  159. fill_mask: tuple[float, ...] | float
  160. border_mode: Literal[
  161. cv2.BORDER_CONSTANT,
  162. cv2.BORDER_REPLICATE,
  163. cv2.BORDER_REFLECT,
  164. cv2.BORDER_WRAP,
  165. cv2.BORDER_REFLECT_101,
  166. ]
  167. def __init__(
  168. self,
  169. padding: int | tuple[int, int] | tuple[int, int, int, int] = 0,
  170. fill: tuple[float, ...] | float = 0,
  171. fill_mask: tuple[float, ...] | float = 0,
  172. border_mode: Literal[
  173. cv2.BORDER_CONSTANT,
  174. cv2.BORDER_REPLICATE,
  175. cv2.BORDER_REFLECT,
  176. cv2.BORDER_WRAP,
  177. cv2.BORDER_REFLECT_101,
  178. ] = cv2.BORDER_CONSTANT,
  179. p: float = 1.0,
  180. ):
  181. super().__init__(p=p)
  182. self.padding = padding
  183. self.fill = fill
  184. self.fill_mask = fill_mask
  185. self.border_mode = border_mode
  186. def apply(
  187. self,
  188. img: np.ndarray,
  189. pad_top: int,
  190. pad_bottom: int,
  191. pad_left: int,
  192. pad_right: int,
  193. **params: Any,
  194. ) -> np.ndarray:
  195. """Apply the Pad transform to an image.
  196. Args:
  197. img (np.ndarray): Image to be transformed.
  198. pad_top (int): Top padding.
  199. pad_bottom (int): Bottom padding.
  200. pad_left (int): Left padding.
  201. pad_right (int): Right padding.
  202. **params (Any): Additional parameters.
  203. """
  204. return fgeometric.pad_with_params(
  205. img,
  206. pad_top,
  207. pad_bottom,
  208. pad_left,
  209. pad_right,
  210. border_mode=self.border_mode,
  211. value=self.fill,
  212. )
  213. def apply_to_mask(
  214. self,
  215. mask: np.ndarray,
  216. pad_top: int,
  217. pad_bottom: int,
  218. pad_left: int,
  219. pad_right: int,
  220. **params: Any,
  221. ) -> np.ndarray:
  222. """Apply the Pad transform to a mask.
  223. Args:
  224. mask (np.ndarray): Mask to be transformed.
  225. pad_top (int): Top padding.
  226. pad_bottom (int): Bottom padding.
  227. pad_left (int): Left padding.
  228. pad_right (int): Right padding.
  229. **params (Any): Additional parameters.
  230. """
  231. return fgeometric.pad_with_params(
  232. mask,
  233. pad_top,
  234. pad_bottom,
  235. pad_left,
  236. pad_right,
  237. border_mode=self.border_mode,
  238. value=self.fill_mask,
  239. )
  240. def apply_to_bboxes(
  241. self,
  242. bboxes: np.ndarray,
  243. pad_top: int,
  244. pad_bottom: int,
  245. pad_left: int,
  246. pad_right: int,
  247. **params: Any,
  248. ) -> np.ndarray:
  249. """Apply the Pad transform to bounding boxes.
  250. Args:
  251. bboxes (np.ndarray): Bounding boxes to be transformed.
  252. pad_top (int): Top padding.
  253. pad_bottom (int): Bottom padding.
  254. pad_left (int): Left padding.
  255. pad_right (int): Right padding.
  256. **params (Any): Additional parameters.
  257. """
  258. image_shape = params["shape"][:2]
  259. bboxes_np = denormalize_bboxes(bboxes, params["shape"])
  260. result = fgeometric.pad_bboxes(
  261. bboxes_np,
  262. pad_top,
  263. pad_bottom,
  264. pad_left,
  265. pad_right,
  266. self.border_mode,
  267. image_shape=image_shape,
  268. )
  269. rows, cols = params["shape"][:2]
  270. return normalize_bboxes(
  271. result,
  272. (rows + pad_top + pad_bottom, cols + pad_left + pad_right),
  273. )
  274. def apply_to_keypoints(
  275. self,
  276. keypoints: np.ndarray,
  277. pad_top: int,
  278. pad_bottom: int,
  279. pad_left: int,
  280. pad_right: int,
  281. **params: Any,
  282. ) -> np.ndarray:
  283. """Apply the Pad transform to keypoints.
  284. Args:
  285. keypoints (np.ndarray): Keypoints to be transformed.
  286. pad_top (int): Top padding.
  287. pad_bottom (int): Bottom padding.
  288. pad_left (int): Left padding.
  289. pad_right (int): Right padding.
  290. **params (Any): Additional parameters.
  291. """
  292. return fgeometric.pad_keypoints(
  293. keypoints,
  294. pad_top,
  295. pad_bottom,
  296. pad_left,
  297. pad_right,
  298. self.border_mode,
  299. image_shape=params["shape"][:2],
  300. )
  301. def apply_to_images(
  302. self,
  303. images: np.ndarray,
  304. pad_top: int,
  305. pad_bottom: int,
  306. pad_left: int,
  307. pad_right: int,
  308. **params: Any,
  309. ) -> np.ndarray:
  310. """Apply the Pad transform to a batch of images.
  311. Args:
  312. images (np.ndarray): Batch of images to be transformed.
  313. pad_top (int): Top padding.
  314. pad_bottom (int): Bottom padding.
  315. pad_left (int): Left padding.
  316. pad_right (int): Right padding.
  317. **params (Any): Additional parameters.
  318. """
  319. return fgeometric.pad_images_with_params(
  320. images,
  321. pad_top,
  322. pad_bottom,
  323. pad_left,
  324. pad_right,
  325. border_mode=self.border_mode,
  326. value=self.fill,
  327. )
  328. def get_params_dependent_on_data(
  329. self,
  330. params: dict[str, Any],
  331. data: dict[str, Any],
  332. ) -> dict[str, Any]:
  333. """Get the parameters dependent on the data.
  334. Args:
  335. params (dict[str, Any]): Parameters.
  336. data (dict[str, Any]): Data.
  337. Returns:
  338. dict[str, Any]: Parameters.
  339. """
  340. if isinstance(self.padding, Real):
  341. pad_top = pad_bottom = pad_left = pad_right = self.padding
  342. elif isinstance(self.padding, (tuple, list)):
  343. if len(self.padding) == NUM_PADS_XY:
  344. pad_left = pad_right = self.padding[0]
  345. pad_top = pad_bottom = self.padding[1]
  346. elif len(self.padding) == NUM_PADS_ALL_SIDES:
  347. pad_left, pad_top, pad_right, pad_bottom = self.padding # type: ignore[misc]
  348. else:
  349. raise TypeError(
  350. "Padding must be a single number, a pair of numbers, or a quadruple of numbers",
  351. )
  352. else:
  353. raise TypeError(
  354. "Padding must be a single number, a pair of numbers, or a quadruple of numbers",
  355. )
  356. return {
  357. "pad_top": pad_top,
  358. "pad_bottom": pad_bottom,
  359. "pad_left": pad_left,
  360. "pad_right": pad_right,
  361. }
  362. class PadIfNeeded(Pad):
  363. """Pads the sides of an image if the image dimensions are less than the specified minimum dimensions.
  364. If the `pad_height_divisor` or `pad_width_divisor` is specified, the function additionally ensures
  365. that the image dimensions are divisible by these values.
  366. Args:
  367. min_height (int | None): Minimum desired height of the image. Ensures image height is at least this value.
  368. If not specified, pad_height_divisor must be provided.
  369. min_width (int | None): Minimum desired width of the image. Ensures image width is at least this value.
  370. If not specified, pad_width_divisor must be provided.
  371. pad_height_divisor (int | None): If set, pads the image height to make it divisible by this value.
  372. If not specified, min_height must be provided.
  373. pad_width_divisor (int | None): If set, pads the image width to make it divisible by this value.
  374. If not specified, min_width must be provided.
  375. position (Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"]):
  376. Position where the image is to be placed after padding. Default is 'center'.
  377. border_mode (int): Specifies the border mode to use if padding is required.
  378. The default is `cv2.BORDER_CONSTANT`.
  379. fill (tuple[float, ...] | float | None): Value to fill the border pixels if the border mode
  380. is `cv2.BORDER_CONSTANT`. Default is None.
  381. fill_mask (tuple[float, ...] | float | None): Similar to `fill` but used for padding masks. Default is None.
  382. p (float): Probability of applying the transform. Default is 1.0.
  383. Targets:
  384. image, mask, bboxes, keypoints, volume, mask3d
  385. Image types:
  386. uint8, float32
  387. Note:
  388. - Either `min_height` or `pad_height_divisor` must be set, but not both.
  389. - Either `min_width` or `pad_width_divisor` must be set, but not both.
  390. - If `border_mode` is set to `cv2.BORDER_CONSTANT`, `value` must be provided.
  391. - The transform will maintain consistency across all targets (image, mask, bboxes, keypoints, volume).
  392. - For bounding boxes, the coordinates will be adjusted to account for the padding.
  393. - For keypoints, their positions will be shifted according to the padding.
  394. Examples:
  395. >>> import numpy as np
  396. >>> import albumentations as A
  397. >>> import cv2
  398. >>>
  399. >>> # Prepare sample data
  400. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  401. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  402. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  403. >>> bbox_labels = [1, 2]
  404. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  405. >>> keypoint_labels = [0, 1]
  406. >>>
  407. >>> # Example 1: Basic usage with min_height and min_width
  408. >>> transform = A.Compose([
  409. ... A.PadIfNeeded(min_height=150, min_width=200, border_mode=cv2.BORDER_CONSTANT, fill=0),
  410. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  411. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  412. >>>
  413. >>> # Apply the transform
  414. >>> padded = transform(
  415. ... image=image,
  416. ... mask=mask,
  417. ... bboxes=bboxes,
  418. ... bbox_labels=bbox_labels,
  419. ... keypoints=keypoints,
  420. ... keypoint_labels=keypoint_labels
  421. ... )
  422. >>>
  423. >>> # Get the padded data
  424. >>> padded_image = padded['image'] # Shape will be (150, 200, 3)
  425. >>> padded_mask = padded['mask'] # Shape will be (150, 200)
  426. >>> padded_bboxes = padded['bboxes'] # Bounding boxes adjusted for the padded image
  427. >>> padded_bbox_labels = padded['bbox_labels'] # Labels remain unchanged
  428. >>> padded_keypoints = padded['keypoints'] # Keypoints adjusted for the padded image
  429. >>> padded_keypoint_labels = padded['keypoint_labels'] # Labels remain unchanged
  430. >>>
  431. >>> # Example 2: Using pad_height_divisor and pad_width_divisor
  432. >>> # This ensures the output dimensions are divisible by the specified values
  433. >>> transform_divisor = A.Compose([
  434. ... A.PadIfNeeded(
  435. ... pad_height_divisor=32,
  436. ... pad_width_divisor=32,
  437. ... border_mode=cv2.BORDER_CONSTANT,
  438. ... fill=0
  439. ... ),
  440. ... ])
  441. >>>
  442. >>> padded_divisor = transform_divisor(image=image)
  443. >>> padded_divisor_image = padded_divisor['image'] # Shape will be (128, 128, 3) - divisible by 32
  444. >>>
  445. >>> # Example 3: Different position options
  446. >>> # Create a small recognizable image for better visualization of positioning
  447. >>> small_image = np.zeros((50, 50, 3), dtype=np.uint8)
  448. >>> small_image[20:30, 20:30, :] = 255 # White square in the middle
  449. >>>
  450. >>> # Top-left positioning
  451. >>> top_left_pad = A.Compose([
  452. ... A.PadIfNeeded(
  453. ... min_height=100,
  454. ... min_width=100,
  455. ... position="top_left",
  456. ... border_mode=cv2.BORDER_CONSTANT,
  457. ... fill=128 # Gray padding
  458. ... ),
  459. ... ])
  460. >>> top_left_result = top_left_pad(image=small_image)
  461. >>> top_left_image = top_left_result['image'] # Image will be at top-left of 100x100 canvas
  462. >>>
  463. >>> # Center positioning (default)
  464. >>> center_pad = A.Compose([
  465. ... A.PadIfNeeded(
  466. ... min_height=100,
  467. ... min_width=100,
  468. ... position="center",
  469. ... border_mode=cv2.BORDER_CONSTANT,
  470. ... fill=128
  471. ... ),
  472. ... ])
  473. >>> center_result = center_pad(image=small_image)
  474. >>> center_image = center_result['image'] # Image will be centered in 100x100 canvas
  475. >>>
  476. >>> # Example 4: Different border_mode options
  477. >>> # Reflection padding
  478. >>> reflect_pad = A.Compose([
  479. ... A.PadIfNeeded(
  480. ... min_height=100,
  481. ... min_width=100,
  482. ... border_mode=cv2.BORDER_REFLECT_101
  483. ... ),
  484. ... ])
  485. >>> reflected = reflect_pad(image=small_image)
  486. >>> reflected_image = reflected['image'] # Will use reflection for padding
  487. >>>
  488. >>> # Replication padding
  489. >>> replicate_pad = A.Compose([
  490. ... A.PadIfNeeded(
  491. ... min_height=100,
  492. ... min_width=100,
  493. ... border_mode=cv2.BORDER_REPLICATE
  494. ... ),
  495. ... ])
  496. >>> replicated = replicate_pad(image=small_image)
  497. >>> replicated_image = replicated['image'] # Will use edge replication for padding
  498. >>>
  499. >>> # Example 5: Working with masks and custom fill values
  500. >>> binary_mask = np.zeros((50, 50), dtype=np.uint8)
  501. >>> binary_mask[10:40, 10:40] = 1 # Set center region to 1
  502. >>>
  503. >>> mask_transform = A.Compose([
  504. ... A.PadIfNeeded(
  505. ... min_height=100,
  506. ... min_width=100,
  507. ... border_mode=cv2.BORDER_CONSTANT,
  508. ... fill=0, # Black padding for image
  509. ... fill_mask=0 # Use 0 for mask padding (background)
  510. ... ),
  511. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
  512. >>>
  513. >>> padded_mask_result = mask_transform(
  514. ... image=image,
  515. ... mask=binary_mask,
  516. ... bboxes=bboxes,
  517. ... bbox_labels=bbox_labels
  518. ... )
  519. >>> padded_binary_mask = padded_mask_result['mask'] # Shape will be (100, 100)
  520. >>> padded_result_bboxes = padded_mask_result['bboxes'] # Adjusted for padding
  521. >>> padded_result_bbox_labels = padded_mask_result['bbox_labels'] # Labels remain unchanged
  522. """
  523. class InitSchema(BaseTransformInitSchema):
  524. min_height: int | None = Field(ge=1)
  525. min_width: int | None = Field(ge=1)
  526. pad_height_divisor: int | None = Field(ge=1)
  527. pad_width_divisor: int | None = Field(ge=1)
  528. position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"]
  529. border_mode: Literal[
  530. cv2.BORDER_CONSTANT,
  531. cv2.BORDER_REPLICATE,
  532. cv2.BORDER_REFLECT,
  533. cv2.BORDER_WRAP,
  534. cv2.BORDER_REFLECT_101,
  535. ]
  536. fill: tuple[float, ...] | float
  537. fill_mask: tuple[float, ...] | float
  538. @model_validator(mode="after")
  539. def _validate_divisibility(self) -> Self:
  540. if (self.min_height is None) == (self.pad_height_divisor is None):
  541. msg = "Only one of 'min_height' and 'pad_height_divisor' parameters must be set"
  542. raise ValueError(msg)
  543. if (self.min_width is None) == (self.pad_width_divisor is None):
  544. msg = "Only one of 'min_width' and 'pad_width_divisor' parameters must be set"
  545. raise ValueError(msg)
  546. if self.border_mode == cv2.BORDER_CONSTANT and self.fill is None:
  547. msg = "If 'border_mode' is set to 'BORDER_CONSTANT', 'fill' must be provided."
  548. raise ValueError(msg)
  549. return self
  550. def __init__(
  551. self,
  552. min_height: int | None = 1024,
  553. min_width: int | None = 1024,
  554. pad_height_divisor: int | None = None,
  555. pad_width_divisor: int | None = None,
  556. position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"] = "center",
  557. border_mode: Literal[
  558. cv2.BORDER_CONSTANT,
  559. cv2.BORDER_REPLICATE,
  560. cv2.BORDER_REFLECT,
  561. cv2.BORDER_WRAP,
  562. cv2.BORDER_REFLECT_101,
  563. ] = cv2.BORDER_CONSTANT,
  564. fill: tuple[float, ...] | float = 0,
  565. fill_mask: tuple[float, ...] | float = 0,
  566. p: float = 1.0,
  567. ):
  568. # Initialize with dummy padding that will be calculated later
  569. super().__init__(
  570. padding=0,
  571. fill=fill,
  572. fill_mask=fill_mask,
  573. border_mode=border_mode,
  574. p=p,
  575. )
  576. self.min_height = min_height
  577. self.min_width = min_width
  578. self.pad_height_divisor = pad_height_divisor
  579. self.pad_width_divisor = pad_width_divisor
  580. self.position = position
  581. def get_params_dependent_on_data(
  582. self,
  583. params: dict[str, Any],
  584. data: dict[str, Any],
  585. ) -> dict[str, Any]:
  586. """Get the parameters dependent on the data.
  587. Args:
  588. params (dict[str, Any]): Parameters.
  589. data (dict[str, Any]): Data.
  590. Returns:
  591. dict[str, Any]: Parameters.
  592. """
  593. h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = fgeometric.get_padding_params(
  594. image_shape=params["shape"][:2],
  595. min_height=self.min_height,
  596. min_width=self.min_width,
  597. pad_height_divisor=self.pad_height_divisor,
  598. pad_width_divisor=self.pad_width_divisor,
  599. )
  600. h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = fgeometric.adjust_padding_by_position(
  601. h_top=h_pad_top,
  602. h_bottom=h_pad_bottom,
  603. w_left=w_pad_left,
  604. w_right=w_pad_right,
  605. position=self.position,
  606. py_random=self.py_random,
  607. )
  608. return {
  609. "pad_top": h_pad_top,
  610. "pad_bottom": h_pad_bottom,
  611. "pad_left": w_pad_left,
  612. "pad_right": w_pad_right,
  613. }