functional.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017
  1. """Functional implementations of dropout operations for image augmentation.
  2. This module provides low-level functions for various dropout techniques used in image
  3. augmentation, including channel dropout, grid dropout, mask dropout, and coarse dropout.
  4. These functions create and apply dropout patterns to images, masks, bounding boxes, and
  5. keypoints, with support for different filling methods and hole generation strategies.
  6. """
  7. from __future__ import annotations
  8. from typing import Literal, cast
  9. import cv2
  10. import numpy as np
  11. from albucore import (
  12. MAX_VALUES_BY_DTYPE,
  13. NUM_MULTI_CHANNEL_DIMENSIONS,
  14. get_num_channels,
  15. is_grayscale_image,
  16. preserve_channel_dim,
  17. uint8_io,
  18. )
  19. from albumentations.augmentations.geometric.functional import split_uniform_grid
  20. from albumentations.augmentations.utils import handle_empty_array
  21. from albumentations.core.type_definitions import MONO_CHANNEL_DIMENSIONS
  22. __all__ = [
  23. "calculate_grid_dimensions",
  24. "channel_dropout",
  25. "cutout",
  26. "filter_bboxes_by_holes",
  27. "filter_keypoints_in_holes",
  28. "generate_grid_holes",
  29. "generate_random_fill",
  30. ]
  31. @preserve_channel_dim
  32. def channel_dropout(
  33. img: np.ndarray,
  34. channels_to_drop: int | tuple[int, ...] | np.ndarray,
  35. fill: tuple[float, ...] | float = 0,
  36. ) -> np.ndarray:
  37. """Drop channels from an image.
  38. This function drops channels from an image.
  39. Args:
  40. img (np.ndarray): Input image.
  41. channels_to_drop (int | tuple[int, ...] | np.ndarray): Channels to drop.
  42. fill (tuple[float, ...] | float): Value to fill the dropped channels with.
  43. Returns:
  44. np.ndarray: Image with channels dropped.
  45. """
  46. if is_grayscale_image(img):
  47. msg = "Only one channel. ChannelDropout is not defined."
  48. raise NotImplementedError(msg)
  49. img = img.copy()
  50. img[..., channels_to_drop] = fill
  51. return img
  52. def generate_random_fill(
  53. dtype: np.dtype,
  54. shape: tuple[int, ...],
  55. random_generator: np.random.Generator,
  56. ) -> np.ndarray:
  57. """Generate a random fill array based on the given dtype and target shape.
  58. This function creates a numpy array filled with random values. The range and type of these values
  59. depend on the input dtype. For integer dtypes, it generates random integers. For floating-point
  60. dtypes, it generates random floats.
  61. Args:
  62. dtype (np.dtype): The data type of the array to be generated.
  63. shape (tuple[int, ...]): The shape of the array to be generated.
  64. random_generator (np.random.Generator): The random generator to use for generating values.
  65. If None, the default numpy random generator is used.
  66. Returns:
  67. np.ndarray: A numpy array of the specified shape and dtype, filled with random values.
  68. Raises:
  69. ValueError: If the input dtype is neither integer nor floating-point.
  70. Examples:
  71. >>> import numpy as np
  72. >>> random_state = np.random.RandomState(42)
  73. >>> result = generate_random_fill(np.dtype('uint8'), (2, 2), random_state)
  74. >>> print(result)
  75. [[172 251]
  76. [ 80 141]]
  77. """
  78. max_value = MAX_VALUES_BY_DTYPE[dtype]
  79. if np.issubdtype(dtype, np.integer):
  80. return random_generator.integers(0, max_value + 1, size=shape, dtype=dtype)
  81. if np.issubdtype(dtype, np.floating):
  82. return random_generator.uniform(0, max_value, size=shape).astype(dtype)
  83. raise ValueError(f"Unsupported dtype: {dtype}")
  84. @uint8_io
  85. def apply_inpainting(img: np.ndarray, holes: np.ndarray, method: Literal["inpaint_telea", "inpaint_ns"]) -> np.ndarray:
  86. """Apply OpenCV inpainting to fill the holes in the image.
  87. Args:
  88. img (np.ndarray): Input image (grayscale or BGR)
  89. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  90. method (Literal["inpaint_telea", "inpaint_ns"]): Inpainting method to use
  91. Returns:
  92. np.ndarray: Inpainted image
  93. Raises:
  94. NotImplementedError: If image has more than 3 channels
  95. """
  96. num_channels = get_num_channels(img)
  97. # Create inpainting mask
  98. mask = np.zeros(img.shape[:2], dtype=np.uint8)
  99. for x_min, y_min, x_max, y_max in holes:
  100. mask[y_min:y_max, x_min:x_max] = 255
  101. inpaint_method = cv2.INPAINT_TELEA if method == "inpaint_telea" else cv2.INPAINT_NS
  102. # Handle grayscale images by converting to 3 channels and back
  103. if num_channels == 1:
  104. if img.ndim == NUM_MULTI_CHANNEL_DIMENSIONS:
  105. img = img.squeeze()
  106. img_3ch = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  107. result = cv2.inpaint(img_3ch, mask, 3, inpaint_method)
  108. return (
  109. cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)[..., None]
  110. if num_channels == NUM_MULTI_CHANNEL_DIMENSIONS
  111. else cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
  112. )
  113. return cv2.inpaint(img, mask, 3, inpaint_method)
  114. def fill_holes_with_value(img: np.ndarray, holes: np.ndarray, fill: np.ndarray) -> np.ndarray:
  115. """Fill holes with a constant value.
  116. Args:
  117. img (np.ndarray): Input image
  118. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  119. fill (np.ndarray): Value to fill the holes with
  120. """
  121. for x_min, y_min, x_max, y_max in holes:
  122. img[y_min:y_max, x_min:x_max] = fill
  123. return img
  124. def fill_volume_holes_with_value(volume: np.ndarray, holes: np.ndarray, fill: np.ndarray) -> np.ndarray:
  125. """Fill holes in a volume with a constant value.
  126. Args:
  127. volume (np.ndarray): Input volume
  128. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  129. fill (np.ndarray): Value to fill the holes with
  130. """
  131. for x_min, y_min, x_max, y_max in holes:
  132. volume[:, y_min:y_max, x_min:x_max] = fill
  133. return volume
  134. def fill_volumes_holes_with_value(volumes: np.ndarray, holes: np.ndarray, fill: np.ndarray) -> np.ndarray:
  135. """Fill holes in a batch of volumes with a constant value.
  136. Args:
  137. volumes (np.ndarray): Input batch of volumes
  138. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  139. fill (np.ndarray): Value to fill the holes with
  140. """
  141. for x_min, y_min, x_max, y_max in holes:
  142. volumes[:, :, y_min:y_max, x_min:x_max] = fill
  143. return volumes
  144. def fill_holes_with_random(
  145. img: np.ndarray,
  146. holes: np.ndarray,
  147. random_generator: np.random.Generator,
  148. uniform: bool,
  149. ) -> np.ndarray:
  150. """Fill holes with random values.
  151. Args:
  152. img (np.ndarray): Input image
  153. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  154. random_generator (np.random.Generator): Random number generator
  155. uniform (bool): If True, use same random value for entire hole
  156. """
  157. for x_min, y_min, x_max, y_max in holes:
  158. shape = (1,) if uniform else (y_max - y_min, x_max - x_min)
  159. if img.ndim != MONO_CHANNEL_DIMENSIONS:
  160. shape = (1, img.shape[2]) if uniform else (*shape, img.shape[2])
  161. random_fill = generate_random_fill(img.dtype, shape, random_generator)
  162. img[y_min:y_max, x_min:x_max] = random_fill
  163. return img
  164. def fill_volume_holes_with_random(
  165. volume: np.ndarray,
  166. holes: np.ndarray,
  167. random_generator: np.random.Generator,
  168. uniform: bool,
  169. ) -> np.ndarray:
  170. """Fill holes in a volume with random values.
  171. Args:
  172. volume (np.ndarray): Input volume of shape (D, H, W, C) or (D, H, W)
  173. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  174. random_generator (np.random.Generator): Random number generator
  175. uniform (bool): If True, use same random value for entire hole in each image.
  176. """
  177. for x_min, y_min, x_max, y_max in holes:
  178. shape = (volume.shape[0], 1, 1) if uniform else (volume.shape[0], y_max - y_min, x_max - x_min)
  179. if volume.ndim != 3:
  180. shape = (volume.shape[0], 1, 1, volume.shape[3]) if uniform else (*shape, volume.shape[3])
  181. random_fill = generate_random_fill(volume.dtype, shape, random_generator)
  182. volume[:, y_min:y_max, x_min:x_max] = random_fill
  183. return volume
  184. def fill_volumes_holes_with_random(
  185. volumes: np.ndarray,
  186. holes: np.ndarray,
  187. random_generator: np.random.Generator,
  188. uniform: bool,
  189. ) -> np.ndarray:
  190. """Fill holes in a batch of volumes with random values.
  191. Args:
  192. volumes (np.ndarray): Input volume of shape (N, D, H, W, C) or (N, D, H, W)
  193. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  194. random_generator (np.random.Generator): Random number generator
  195. uniform (bool): If True, use same random value for entire hole for each image
  196. """
  197. for x_min, y_min, x_max, y_max in holes:
  198. shape = (
  199. (volumes.shape[0], volumes.shape[1], 1, 1)
  200. if uniform
  201. else (volumes.shape[0], volumes.shape[1], y_max - y_min, x_max - x_min)
  202. )
  203. if volumes.ndim != 4:
  204. shape = (
  205. (volumes.shape[0], volumes.shape[1], 1, 1, volumes.shape[4]) if uniform else (*shape, volumes.shape[4])
  206. )
  207. random_fill = generate_random_fill(volumes.dtype, shape, random_generator)
  208. volumes[:, :, y_min:y_max, x_min:x_max] = random_fill
  209. return volumes
  210. def cutout(
  211. img: np.ndarray,
  212. holes: np.ndarray,
  213. fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
  214. random_generator: np.random.Generator,
  215. ) -> np.ndarray:
  216. """Apply cutout augmentation to the image by cutting out holes and filling them.
  217. Args:
  218. img (np.ndarray): The image to augment
  219. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  220. fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
  221. Value to fill holes with. Can be:
  222. - number (int/float): Will be broadcast to all channels
  223. - sequence (tuple/list/ndarray): Must match number of channels
  224. - "random": Different random values for each pixel
  225. - "random_uniform": Same random value for entire hole
  226. - "inpaint_telea"/"inpaint_ns": OpenCV inpainting methods
  227. random_generator (np.random.Generator): Random number generator for random fills
  228. Raises:
  229. ValueError: If fill length doesn't match number of channels
  230. """
  231. img = img.copy()
  232. # Handle inpainting methods
  233. if isinstance(fill, str):
  234. if fill in {"inpaint_telea", "inpaint_ns"}:
  235. return apply_inpainting(img, holes, cast("Literal['inpaint_telea', 'inpaint_ns']", fill))
  236. if fill == "random":
  237. return fill_holes_with_random(img, holes, random_generator, uniform=False)
  238. if fill == "random_uniform":
  239. return fill_holes_with_random(img, holes, random_generator, uniform=True)
  240. raise ValueError(f"Unsupported string fill: {fill}")
  241. # Convert numeric fill values to numpy array
  242. if isinstance(fill, (int, float)):
  243. fill_array = np.array(fill, dtype=img.dtype)
  244. return fill_holes_with_value(img, holes, fill_array)
  245. # Handle sequence fill values
  246. fill_array = np.array(fill, dtype=img.dtype)
  247. # For multi-channel images, verify fill matches number of channels
  248. if img.ndim == NUM_MULTI_CHANNEL_DIMENSIONS:
  249. fill_array = fill_array.ravel()
  250. if fill_array.size != img.shape[2]:
  251. raise ValueError(
  252. f"Fill value must have same number of channels as image. "
  253. f"Got {fill_array.size}, expected {img.shape[2]}",
  254. )
  255. return fill_holes_with_value(img, holes, fill_array)
  256. def cutout_on_volume(
  257. volume: np.ndarray,
  258. holes: np.ndarray,
  259. fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
  260. random_generator: np.random.Generator,
  261. ) -> np.ndarray:
  262. """Apply cutout augmentation to a volume of shape (D, H, W) or (D, H, W, C) by cutting out holes and filling them.
  263. Args:
  264. volume (np.ndarray): The volume to augment
  265. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  266. fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
  267. Value to fill holes with. Can be:
  268. - number (int/float): Will be broadcast to all channels
  269. - sequence (tuple/list/ndarray): Must match number of channels
  270. - "random": Different random values for each pixel
  271. - "random_uniform": Same random value for entire hole, different values across images
  272. - "inpaint_telea"/"inpaint_ns": OpenCV inpainting methods
  273. random_generator (np.random.Generator): Random number generator for random fills
  274. Raises:
  275. ValueError: If fill length doesn't match number of channels
  276. """
  277. volume = volume.copy()
  278. # Handle inpainting methods
  279. if isinstance(fill, str):
  280. if fill in {"inpaint_telea", "inpaint_ns"}:
  281. processed_images = [
  282. apply_inpainting(img, holes, cast("Literal['inpaint_telea', 'inpaint_ns']", fill)) for img in volume
  283. ]
  284. result = np.array(processed_images)
  285. # Reshape to original volume shape: (D, H, W, C) or (D, H, W)
  286. return result.reshape(volume.shape)
  287. if fill == "random":
  288. return fill_volume_holes_with_random(volume, holes, random_generator, uniform=False)
  289. if fill == "random_uniform":
  290. return fill_volume_holes_with_random(volume, holes, random_generator, uniform=True)
  291. raise ValueError(f"Unsupported string fill: {fill}")
  292. # Convert numeric fill values to numpy array
  293. if isinstance(fill, (int, float)):
  294. fill_array = np.array(fill, dtype=volume.dtype)
  295. return fill_volume_holes_with_value(volume, holes, fill_array)
  296. # Handle sequence fill values
  297. fill_array = np.array(fill, dtype=volume.dtype)
  298. # For multi-channel images, verify fill matches number of channels
  299. if volume.ndim == 4:
  300. fill_array = fill_array.ravel()
  301. if fill_array.size != volume.shape[3]:
  302. raise ValueError(
  303. f"Fill value must have same number of channels as image. "
  304. f"Got {fill_array.size}, expected {volume.shape[3]}",
  305. )
  306. return fill_volume_holes_with_value(volume, holes, fill_array)
  307. def cutout_on_volumes(
  308. volumes: np.ndarray,
  309. holes: np.ndarray,
  310. fill: tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"],
  311. random_generator: np.random.Generator,
  312. ) -> np.ndarray:
  313. """Apply cutout augmentation to a batch of volumes of shape (N, D, H, W) or (N, D, H, W, C)
  314. Args:
  315. volumes (np.ndarray): The image to augment
  316. holes (np.ndarray): Array of [x1, y1, x2, y2] coordinates
  317. fill (tuple[float, ...] | float | Literal["random", "random_uniform", "inpaint_telea", "inpaint_ns"]):
  318. Value to fill holes with. Can be:
  319. - number (int/float): Will be broadcast to all channels
  320. - sequence (tuple/list/ndarray): Must match number of channels
  321. - "random": Different random values for each pixel
  322. - "random_uniform": Same random value for entire hole, different values across images
  323. - "inpaint_telea"/"inpaint_ns": OpenCV inpainting methods
  324. random_generator (np.random.Generator): Random number generator for random fills
  325. Raises:
  326. ValueError: If fill length doesn't match number of channels
  327. """
  328. volumes = volumes.copy()
  329. # Handle inpainting methods
  330. if isinstance(fill, str):
  331. if fill in {"inpaint_telea", "inpaint_ns"}:
  332. processed_images = [
  333. apply_inpainting(img, holes, cast("Literal['inpaint_telea', 'inpaint_ns']", fill))
  334. for volume in volumes
  335. for img in volume
  336. ]
  337. result = np.array(processed_images)
  338. # Reshape to original batch of volumes shape: (N, D, H, W, C) or (N, D, H, W)
  339. return result.reshape(volumes.shape)
  340. if fill == "random":
  341. return fill_volumes_holes_with_random(volumes, holes, random_generator, uniform=False)
  342. if fill == "random_uniform":
  343. return fill_volumes_holes_with_random(volumes, holes, random_generator, uniform=True)
  344. raise ValueError(f"Unsupported string fill: {fill}")
  345. # Convert numeric fill values to numpy array
  346. if isinstance(fill, (int, float)):
  347. fill_array = np.array(fill, dtype=volumes.dtype)
  348. return fill_volumes_holes_with_value(volumes, holes, fill_array)
  349. # Handle sequence fill values
  350. fill_array = np.array(fill, dtype=volumes.dtype)
  351. # For multi-channel images, verify fill matches number of channels
  352. if volumes.ndim == 5:
  353. fill_array = fill_array.ravel()
  354. if fill_array.size != volumes.shape[4]:
  355. raise ValueError(
  356. f"Fill value must have same number of channels as image. "
  357. f"Got {fill_array.size}, expected {volumes.shape[4]}",
  358. )
  359. return fill_volumes_holes_with_value(volumes, holes, fill_array)
  360. @handle_empty_array("keypoints")
  361. def filter_keypoints_in_holes(keypoints: np.ndarray, holes: np.ndarray) -> np.ndarray:
  362. """Filter out keypoints that are inside any of the holes.
  363. Args:
  364. keypoints (np.ndarray): Array of keypoints with shape (num_keypoints, 2+).
  365. The first two columns are x and y coordinates.
  366. holes (np.ndarray): Array of holes with shape (num_holes, 4).
  367. Each hole is represented as [x1, y1, x2, y2].
  368. Returns:
  369. np.ndarray: Array of keypoints that are not inside any hole.
  370. """
  371. # Broadcast keypoints and holes for vectorized comparison
  372. kp_x = keypoints[:, 0][:, np.newaxis] # Shape: (num_keypoints, 1)
  373. kp_y = keypoints[:, 1][:, np.newaxis] # Shape: (num_keypoints, 1)
  374. hole_x1 = holes[:, 0] # Shape: (num_holes,)
  375. hole_y1 = holes[:, 1] # Shape: (num_holes,)
  376. hole_x2 = holes[:, 2] # Shape: (num_holes,)
  377. hole_y2 = holes[:, 3] # Shape: (num_holes,)
  378. # Check if each keypoint is inside each hole
  379. inside_hole = (kp_x >= hole_x1) & (kp_x < hole_x2) & (kp_y >= hole_y1) & (kp_y < hole_y2)
  380. # A keypoint is valid if it's not inside any hole
  381. valid_keypoints = ~np.any(inside_hole, axis=1)
  382. return keypoints[valid_keypoints]
  383. @handle_empty_array("bboxes")
  384. def resize_boxes_to_visible_area(
  385. boxes: np.ndarray,
  386. hole_mask: np.ndarray,
  387. ) -> np.ndarray:
  388. """Resize boxes to their largest visible rectangular regions."""
  389. # Extract box coordinates
  390. x1 = boxes[:, 0].astype(int)
  391. y1 = boxes[:, 1].astype(int)
  392. x2 = boxes[:, 2].astype(int)
  393. y2 = boxes[:, 3].astype(int)
  394. # Process each box individually to avoid array shape issues
  395. new_boxes: list[np.ndarray] = []
  396. regions = [hole_mask[y1[i] : y2[i], x1[i] : x2[i]] for i in range(len(boxes))]
  397. visible_areas = [1 - region for region in regions]
  398. for i, (visible, box) in enumerate(zip(visible_areas, boxes)):
  399. if not visible.any():
  400. continue
  401. # Find visible coordinates
  402. y_visible = visible.any(axis=1)
  403. x_visible = visible.any(axis=0)
  404. y_coords = np.nonzero(y_visible)[0]
  405. x_coords = np.nonzero(x_visible)[0]
  406. # Update only the coordinate part of the box
  407. new_box = box.copy()
  408. new_box[0] = x1[i] + x_coords[0] # x_min
  409. new_box[1] = y1[i] + y_coords[0] # y_min
  410. new_box[2] = x1[i] + x_coords[-1] + 1 # x_max
  411. new_box[3] = y1[i] + y_coords[-1] + 1 # y_max
  412. new_boxes.append(new_box)
  413. # Return empty array with correct shape if all boxes were removed
  414. return np.array(new_boxes) if new_boxes else np.zeros((0, boxes.shape[1]), dtype=boxes.dtype)
  415. def filter_bboxes_by_holes(
  416. bboxes: np.ndarray,
  417. holes: np.ndarray,
  418. image_shape: tuple[int, int],
  419. min_area: float,
  420. min_visibility: float,
  421. ) -> np.ndarray:
  422. """Filter bounding boxes by holes.
  423. This function filters bounding boxes by holes.
  424. Args:
  425. bboxes (np.ndarray): Array of bounding boxes.
  426. holes (np.ndarray): Array of holes.
  427. image_shape (tuple[int, int]): Shape of the image.
  428. min_area (float): Minimum area of a bounding box.
  429. min_visibility (float): Minimum visibility of a bounding box.
  430. Returns:
  431. np.ndarray: Filtered bounding boxes.
  432. """
  433. if len(bboxes) == 0 or len(holes) == 0:
  434. return bboxes
  435. # Create hole mask
  436. hole_mask = np.zeros(image_shape, dtype=np.uint8)
  437. for hole in holes:
  438. x_min, y_min, x_max, y_max = hole.astype(int)
  439. hole_mask[y_min:y_max, x_min:x_max] = 1
  440. # Filter boxes by area and visibility
  441. bboxes_int = bboxes.astype(int)
  442. box_areas = (bboxes_int[:, 2] - bboxes_int[:, 0]) * (bboxes_int[:, 3] - bboxes_int[:, 1])
  443. intersection_areas = np.array([np.sum(hole_mask[y:y2, x:x2]) for x, y, x2, y2 in bboxes_int[:, :4]])
  444. remaining_areas = box_areas - intersection_areas
  445. visibility_ratios = remaining_areas / box_areas
  446. mask = (remaining_areas >= min_area) & (visibility_ratios >= min_visibility) & (remaining_areas > 0)
  447. valid_boxes = bboxes[mask]
  448. if len(valid_boxes) == 0:
  449. return np.empty((0, bboxes.shape[1]))
  450. # Try to resize valid boxes
  451. return resize_boxes_to_visible_area(valid_boxes, hole_mask)
  452. def calculate_grid_dimensions(
  453. image_shape: tuple[int, int],
  454. unit_size_range: tuple[int, int] | None,
  455. holes_number_xy: tuple[int, int] | None,
  456. random_generator: np.random.Generator,
  457. ) -> tuple[int, int]:
  458. """Calculate the dimensions of grid units for GridDropout.
  459. This function determines the size of grid units based on the input parameters.
  460. It supports three modes of operation:
  461. 1. Using a range of unit sizes
  462. 2. Using a specified number of holes in x and y directions
  463. 3. Falling back to a default calculation
  464. Args:
  465. image_shape (tuple[int, int]): The shape of the image as (height, width).
  466. unit_size_range (tuple[int, int] | None, optional): A range of possible unit sizes.
  467. If provided, a random size within this range will be chosen for both height and width.
  468. holes_number_xy (tuple[int, int] | None, optional): The number of holes in the x and y directions.
  469. If provided, the grid dimensions will be calculated to fit this number of holes.
  470. random_generator (np.random.Generator): The random generator to use for generating random values.
  471. Returns:
  472. tuple[int, int]: The calculated grid unit dimensions as (unit_height, unit_width).
  473. Raises:
  474. ValueError: If the upper limit of unit_size_range is greater than the shortest image edge.
  475. Notes:
  476. - If both unit_size_range and holes_number_xy are None, the function falls back to a default calculation,
  477. where the grid unit size is set to max(2, image_dimension // 10) for both height and width.
  478. - The function prioritizes unit_size_range over holes_number_xy if both are provided.
  479. - When using holes_number_xy, the actual number of holes may be slightly different due to integer division.
  480. Examples:
  481. >>> image_shape = (100, 200)
  482. >>> calculate_grid_dimensions(image_shape, unit_size_range=(10, 20))
  483. (15, 15) # Random value between 10 and 20
  484. >>> calculate_grid_dimensions(image_shape, holes_number_xy=(5, 10))
  485. (20, 20) # 100 // 5 and 200 // 10
  486. >>> calculate_grid_dimensions(image_shape)
  487. (10, 20) # Default calculation: max(2, dimension // 10)
  488. """
  489. height, width = image_shape[:2]
  490. if unit_size_range is not None:
  491. if unit_size_range[1] > min(image_shape[:2]):
  492. raise ValueError("Grid size limits must be within the shortest image edge.")
  493. unit_size = random_generator.integers(*unit_size_range)
  494. return unit_size, unit_size
  495. if holes_number_xy:
  496. holes_number_x, holes_number_y = holes_number_xy
  497. unit_width = width // holes_number_x
  498. unit_height = height // holes_number_y
  499. return unit_height, unit_width
  500. # Default fallback
  501. unit_width = max(2, width // 10)
  502. unit_height = max(2, height // 10)
  503. return unit_height, unit_width
  504. def generate_grid_holes(
  505. image_shape: tuple[int, int],
  506. grid: tuple[int, int],
  507. ratio: float,
  508. random_offset: bool,
  509. shift_xy: tuple[int, int],
  510. random_generator: np.random.Generator,
  511. ) -> np.ndarray:
  512. """Generate a list of holes for GridDropout using a uniform grid.
  513. This function creates a grid of holes for use in the GridDropout augmentation technique.
  514. It allows for customization of the grid size, hole size ratio, and positioning of holes.
  515. Args:
  516. image_shape (tuple[int, int]): The shape of the image as (height, width).
  517. grid (tuple[int, int]): The grid size as (rows, columns). This determines the number of cells
  518. in the grid, where each cell may contain a hole.
  519. ratio (float): The ratio of the hole size to the grid cell size. Should be between 0 and 1.
  520. A ratio of 1 means the hole will fill the entire grid cell.
  521. random_offset (bool): If True, applies random offsets to each hole within its grid cell.
  522. If False, uses the global shift specified by shift_xy.
  523. shift_xy (tuple[int, int]): The global shift to apply to all holes as (shift_x, shift_y).
  524. Only used when random_offset is False.
  525. random_generator (np.random.Generator): The random generator for generating random offsets
  526. and shuffling. If None, a new Generator will be created.
  527. Returns:
  528. np.ndarray: An array of hole coordinates, where each hole is represented as
  529. [x1, y1, x2, y2]. The shape of the array is (n_holes, 4), where n_holes
  530. is determined by the grid size.
  531. Notes:
  532. - The function first creates a uniform grid based on the image shape and specified grid size.
  533. - Hole sizes are calculated based on the provided ratio and grid cell sizes.
  534. - If random_offset is True, each hole is randomly positioned within its grid cell.
  535. - If random_offset is False, all holes are shifted by the global shift_xy value.
  536. - The function ensures that all holes remain within the image boundaries.
  537. Examples:
  538. >>> image_shape = (100, 100)
  539. >>> grid = (5, 5)
  540. >>> ratio = 0.5
  541. >>> random_offset = True
  542. >>> random_state = np.random.RandomState(42)
  543. >>> shift_xy = (0, 0)
  544. >>> holes = generate_grid_holes(image_shape, grid, ratio, random_offset, random_state, shift_xy)
  545. >>> print(holes.shape)
  546. (25, 4)
  547. >>> print(holes[0]) # Example output: [x1, y1, x2, y2] of the first hole
  548. [ 1 21 11 31]
  549. """
  550. height, width = image_shape[:2]
  551. # Generate the uniform grid
  552. cells = split_uniform_grid(image_shape, grid, random_generator)
  553. # Calculate hole sizes based on the ratio
  554. cell_heights = cells[:, 2] - cells[:, 0]
  555. cell_widths = cells[:, 3] - cells[:, 1]
  556. hole_heights = np.clip(cell_heights * ratio, 1, cell_heights - 1).astype(int)
  557. hole_widths = np.clip(cell_widths * ratio, 1, cell_widths - 1).astype(int)
  558. # Calculate maximum possible offsets
  559. max_offset_y = cell_heights - hole_heights
  560. max_offset_x = cell_widths - hole_widths
  561. if random_offset:
  562. # Generate random offsets for each hole
  563. offset_y = random_generator.integers(0, max_offset_y + 1)
  564. offset_x = random_generator.integers(0, max_offset_x + 1)
  565. else:
  566. # Use global shift
  567. offset_y = np.full_like(max_offset_y, shift_xy[1])
  568. offset_x = np.full_like(max_offset_x, shift_xy[0])
  569. # Calculate hole coordinates
  570. x_min = np.clip(cells[:, 1] + offset_x, 0, width - hole_widths)
  571. y_min = np.clip(cells[:, 0] + offset_y, 0, height - hole_heights)
  572. x_max = np.minimum(x_min + hole_widths, width)
  573. y_max = np.minimum(y_min + hole_heights, height)
  574. return np.column_stack((x_min, y_min, x_max, y_max))
  575. @handle_empty_array("bboxes")
  576. def mask_dropout_bboxes(
  577. bboxes: np.ndarray,
  578. dropout_mask: np.ndarray,
  579. image_shape: tuple[int, int],
  580. min_area: float,
  581. min_visibility: float,
  582. ) -> np.ndarray:
  583. """Filter and resize bounding boxes based on dropout mask.
  584. Args:
  585. bboxes (np.ndarray): Array of bounding boxes with shape (num_boxes, 4+)
  586. dropout_mask (np.ndarray): Binary mask indicating dropped areas
  587. image_shape (tuple[int, int]): Shape of the image (height, width)
  588. min_area (float): Minimum area of a bounding box to keep
  589. min_visibility (float): Minimum visibility ratio of a bounding box to keep
  590. Returns:
  591. np.ndarray: Filtered and resized bounding boxes
  592. """
  593. height, width = image_shape
  594. # Ensure dropout_mask is 2D
  595. if dropout_mask.ndim > 2:
  596. if dropout_mask.shape[0] == 1: # Shape is (1, H, W)
  597. dropout_mask = dropout_mask.squeeze(0)
  598. elif dropout_mask.shape[-1] <= 4: # Shape is (H, W, C)
  599. dropout_mask = np.any(dropout_mask, axis=-1)
  600. else: # Shape is (C, H, W)
  601. dropout_mask = np.any(dropout_mask, axis=0)
  602. # Create binary masks for each bounding box
  603. y, x = np.ogrid[:height, :width]
  604. box_masks = (
  605. (x[None, :] >= bboxes[:, 0, None, None])
  606. & (x[None, :] <= bboxes[:, 2, None, None])
  607. & (y[None, :] >= bboxes[:, 1, None, None])
  608. & (y[None, :] <= bboxes[:, 3, None, None])
  609. )
  610. # Calculate the area of each bounding box
  611. box_areas = (bboxes[:, 2] - bboxes[:, 0]) * (bboxes[:, 3] - bboxes[:, 1])
  612. # Calculate the visible area of each box (non-intersecting area with dropout mask)
  613. visible_areas = np.sum(box_masks & ~dropout_mask, axis=(1, 2))
  614. # Calculate visibility ratio (visible area / total box area)
  615. visibility_ratio = visible_areas / box_areas
  616. # Create a boolean mask for boxes to keep
  617. keep_mask = (visible_areas >= min_area) & (visibility_ratio >= min_visibility)
  618. return bboxes[keep_mask]
  619. @handle_empty_array("keypoints")
  620. def mask_dropout_keypoints(
  621. keypoints: np.ndarray,
  622. dropout_mask: np.ndarray,
  623. ) -> np.ndarray:
  624. """Filter keypoints based on dropout mask.
  625. Args:
  626. keypoints (np.ndarray): Array of keypoints with shape (num_keypoints, 2+)
  627. dropout_mask (np.ndarray): Binary mask indicating dropped areas
  628. Returns:
  629. np.ndarray: Filtered keypoints
  630. """
  631. # Ensure dropout_mask is 2D
  632. if dropout_mask.ndim > 2:
  633. if dropout_mask.shape[0] == 1: # Shape is (1, H, W)
  634. dropout_mask = dropout_mask.squeeze(0)
  635. elif dropout_mask.shape[-1] <= 4: # Shape is (H, W, C)
  636. dropout_mask = np.any(dropout_mask, axis=-1)
  637. else: # Shape is (C, H, W)
  638. dropout_mask = np.any(dropout_mask, axis=0)
  639. # Get coordinates as integers
  640. coords = keypoints[:, :2].astype(int)
  641. # Filter out keypoints that are outside the mask dimensions
  642. valid_mask = (
  643. (coords[:, 0] >= 0)
  644. & (coords[:, 0] < dropout_mask.shape[1])
  645. & (coords[:, 1] >= 0)
  646. & (coords[:, 1] < dropout_mask.shape[0])
  647. )
  648. # For valid keypoints, check if they fall on non-dropped pixels
  649. if np.any(valid_mask):
  650. valid_coords = coords[valid_mask]
  651. valid_mask[valid_mask] = ~dropout_mask[valid_coords[:, 1], valid_coords[:, 0]]
  652. return keypoints[valid_mask]
  653. def label(mask: np.ndarray, return_num: bool = False, connectivity: int = 2) -> np.ndarray | tuple[np.ndarray, int]:
  654. """Label connected regions of an integer array.
  655. This function uses OpenCV's connectedComponents under the hood but mimics
  656. the behavior of scikit-image's label function.
  657. Args:
  658. mask (np.ndarray): The array to label. Must be of integer type.
  659. return_num (bool): If True, return the number of labels (default: False).
  660. connectivity (int): Maximum number of orthogonal hops to consider a pixel/voxel
  661. as a neighbor. Accepted values are 1 or 2. Default is 2.
  662. Returns:
  663. np.ndarray | tuple[np.ndarray, int]: Labeled array, where all connected regions are
  664. assigned the same integer value. If return_num is True, it also returns the number of labels.
  665. """
  666. # Create a copy of the original mask
  667. labeled = np.zeros_like(mask, dtype=np.int32)
  668. # Get unique non-zero values from the original mask
  669. unique_values = np.unique(mask[mask != 0])
  670. # Label each unique value separately
  671. next_label = 1
  672. for value in unique_values:
  673. binary_mask = (mask == value).astype(np.uint8)
  674. # Set connectivity for OpenCV (4 or 8)
  675. cv2_connectivity = 4 if connectivity == 1 else 8
  676. # Use OpenCV's connectedComponents
  677. num_labels, labels = cv2.connectedComponents(binary_mask, connectivity=cv2_connectivity)
  678. # Assign new labels
  679. for i in range(1, num_labels):
  680. labeled[labels == i] = next_label
  681. next_label += 1
  682. num_labels = next_label - 1
  683. return (labeled, num_labels) if return_num else labeled
  684. def get_holes_from_boxes(
  685. target_boxes: np.ndarray,
  686. num_holes_per_box: int,
  687. hole_height_range: tuple[float, float],
  688. hole_width_range: tuple[float, float],
  689. random_generator: np.random.Generator,
  690. ) -> np.ndarray:
  691. """Generate holes based on bounding boxes."""
  692. num_boxes = len(target_boxes)
  693. # Get box dimensions (N, )
  694. box_widths = target_boxes[:, 2] - target_boxes[:, 0]
  695. box_heights = target_boxes[:, 3] - target_boxes[:, 1]
  696. # Sample hole dimensions (N, num_holes)
  697. hole_heights = (
  698. random_generator.uniform(
  699. hole_height_range[0],
  700. hole_height_range[1],
  701. size=(num_boxes, num_holes_per_box),
  702. )
  703. * box_heights[:, None]
  704. ).astype(np.int32)
  705. hole_widths = (
  706. random_generator.uniform(
  707. hole_width_range[0],
  708. hole_width_range[1],
  709. size=(num_boxes, num_holes_per_box),
  710. )
  711. * box_widths[:, None]
  712. ).astype(np.int32)
  713. # Sample positions (N, num_holes)
  714. x_offsets = random_generator.uniform(0, 1, size=(num_boxes, num_holes_per_box)) * (
  715. box_widths[:, None] - hole_widths
  716. )
  717. y_offsets = random_generator.uniform(0, 1, size=(num_boxes, num_holes_per_box)) * (
  718. box_heights[:, None] - hole_heights
  719. )
  720. # Calculate final coordinates (N, num_holes)
  721. x_min = target_boxes[:, 0, None] + x_offsets
  722. y_min = target_boxes[:, 1, None] + y_offsets
  723. x_max = x_min + hole_widths
  724. y_max = y_min + hole_heights
  725. return np.stack([x_min, y_min, x_max, y_max], axis=-1).astype(np.int32).reshape(-1, 4)
  726. def sample_points_from_components(
  727. mask: np.ndarray,
  728. num_points: int,
  729. random_generator: np.random.Generator,
  730. ) -> tuple[np.ndarray, np.ndarray] | None:
  731. """Sample points from connected components in a mask.
  732. Args:
  733. mask (np.ndarray): Binary mask
  734. num_points (int): Number of points to sample
  735. random_generator (np.random.Generator): Random number generator
  736. Returns:
  737. tuple[np.ndarray, np.ndarray] | None: Tuple of (x_coordinates, y_coordinates) or None if no valid components
  738. """
  739. num_labels, labels = cv2.connectedComponents(mask.astype(np.uint8))
  740. if num_labels == 1: # Only background
  741. return None
  742. centers = []
  743. obj_sizes = []
  744. for label in range(1, num_labels): # Skip background (0)
  745. points = np.argwhere(labels == label) # Returns (y, x) coordinates
  746. if len(points) == 0:
  747. continue
  748. # Calculate object size once per component
  749. obj_size = np.sqrt(len(points))
  750. # Randomly sample points from the component, allowing repeats
  751. indices = random_generator.choice(len(points), size=num_points, replace=True)
  752. sampled_points = points[indices]
  753. # Convert from (y, x) to (x, y)
  754. centers.extend(sampled_points[:, ::-1])
  755. # Add corresponding object size for each point
  756. obj_sizes.extend([obj_size] * num_points)
  757. return (np.array(centers), np.array(obj_sizes)) if centers else None
  758. def get_holes_from_mask(
  759. mask: np.ndarray,
  760. num_holes_per_obj: int,
  761. mask_indices: list[int],
  762. hole_height_range: tuple[float, float],
  763. hole_width_range: tuple[float, float],
  764. random_generator: np.random.Generator,
  765. ) -> np.ndarray:
  766. """Generate holes based on segmentation mask."""
  767. # Create binary mask for target indices
  768. binary_mask = np.isin(mask, np.array(mask_indices))
  769. if not np.any(binary_mask): # If no target objects found
  770. return np.array([], dtype=np.int32).reshape((0, 4))
  771. result = sample_points_from_components(binary_mask, num_holes_per_obj, random_generator)
  772. if result is None:
  773. return np.array([], dtype=np.int32).reshape((0, 4))
  774. centers, obj_sizes = result
  775. num_centers = len(centers)
  776. height, width = mask.shape[:2]
  777. # Sample hole dimensions (N,) using per-component object sizes
  778. hole_heights = (
  779. random_generator.uniform(
  780. hole_height_range[0],
  781. hole_height_range[1],
  782. size=num_centers,
  783. )
  784. * obj_sizes
  785. )
  786. hole_widths = (
  787. random_generator.uniform(
  788. hole_width_range[0],
  789. hole_width_range[1],
  790. size=num_centers,
  791. )
  792. * obj_sizes
  793. )
  794. # Calculate hole coordinates around centers
  795. half_heights = hole_heights // 2
  796. half_widths = hole_widths // 2
  797. holes = np.column_stack(
  798. [
  799. centers[:, 0] - half_widths, # x_min
  800. centers[:, 1] - half_heights, # y_min
  801. centers[:, 0] + half_widths, # x_max
  802. centers[:, 1] + half_heights, # y_max
  803. ],
  804. ).astype(np.int32)
  805. # Clip holes to image boundaries
  806. holes[:, 0] = np.clip(holes[:, 0], 0, width - 1) # x_min
  807. holes[:, 1] = np.clip(holes[:, 1], 0, height - 1) # y_min
  808. holes[:, 2] = np.clip(holes[:, 2], 0, width) # x_max
  809. holes[:, 3] = np.clip(holes[:, 3], 0, height) # y_max
  810. # Filter out holes that became too small after clipping
  811. valid_holes = (holes[:, 2] - holes[:, 0] > 0) & (holes[:, 3] - holes[:, 1] > 0)
  812. return holes[valid_holes]