transforms.py 145 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647
  1. """Transform classes for cropping operations on images and other data types.
  2. This module provides various crop transforms that can be applied to images, masks,
  3. bounding boxes, and keypoints. The transforms include simple cropping, random cropping,
  4. center cropping, cropping near bounding boxes, and other specialized cropping operations
  5. that maintain the integrity of bounding boxes. These transforms are designed to work within
  6. the albumentations pipeline and can be used for data augmentation in computer vision tasks.
  7. """
  8. from __future__ import annotations
  9. import math
  10. from collections.abc import Sequence
  11. from typing import Annotated, Any, Literal, Union, cast
  12. import cv2
  13. import numpy as np
  14. from pydantic import AfterValidator, Field, model_validator
  15. from typing_extensions import Self
  16. from albumentations.augmentations.geometric import functional as fgeometric
  17. from albumentations.core.bbox_utils import denormalize_bboxes, normalize_bboxes, union_of_bboxes
  18. from albumentations.core.pydantic import (
  19. OnePlusIntRangeType,
  20. ZeroOneRangeType,
  21. check_range_bounds,
  22. nondecreasing,
  23. )
  24. from albumentations.core.transforms_interface import BaseTransformInitSchema, DualTransform
  25. from albumentations.core.type_definitions import (
  26. ALL_TARGETS,
  27. NUM_MULTI_CHANNEL_DIMENSIONS,
  28. PAIR,
  29. PercentType,
  30. PxType,
  31. )
  32. from . import functional as fcrops
  33. __all__ = [
  34. "AtLeastOneBBoxRandomCrop",
  35. "BBoxSafeRandomCrop",
  36. "CenterCrop",
  37. "Crop",
  38. "CropAndPad",
  39. "CropNonEmptyMaskIfExists",
  40. "RandomCrop",
  41. "RandomCropFromBorders",
  42. "RandomCropNearBBox",
  43. "RandomResizedCrop",
  44. "RandomSizedBBoxSafeCrop",
  45. "RandomSizedCrop",
  46. ]
  47. class CropSizeError(Exception):
  48. pass
  49. class BaseCrop(DualTransform):
  50. """Base class for transforms that only perform cropping.
  51. This abstract class provides the foundation for all cropping transformations.
  52. It handles cropping of different data types including images, masks, bounding boxes,
  53. keypoints, and volumes while keeping their spatial relationships intact.
  54. Child classes must implement the `get_params_dependent_on_data` method to determine
  55. crop coordinates based on transform-specific logic. This method should return a dictionary
  56. containing at least a 'crop_coords' key with a tuple value (x_min, y_min, x_max, y_max).
  57. Args:
  58. p (float): Probability of applying the transform. Default: 1.0.
  59. Targets:
  60. image, mask, bboxes, keypoints, volume, mask3d
  61. Image types:
  62. uint8, float32
  63. Note:
  64. This class is not meant to be used directly. Instead, use or create derived
  65. transforms that implement the specific cropping behavior required.
  66. Examples:
  67. >>> import numpy as np
  68. >>> import albumentations as A
  69. >>> from albumentations.augmentations.crops.transforms import BaseCrop
  70. >>>
  71. >>> # Example of a custom crop transform that inherits from BaseCrop
  72. >>> class CustomCenterCrop(BaseCrop):
  73. ... '''A simple custom center crop with configurable size'''
  74. ... def __init__(self, crop_height, crop_width, p=1.0):
  75. ... super().__init__(p=p)
  76. ... self.crop_height = crop_height
  77. ... self.crop_width = crop_width
  78. ...
  79. ... def get_params_dependent_on_data(self, params, data):
  80. ... '''Calculate crop coordinates based on center of image'''
  81. ... image_height, image_width = params["shape"][:2]
  82. ...
  83. ... # Calculate center crop coordinates
  84. ... x_min = max(0, (image_width - self.crop_width) // 2)
  85. ... y_min = max(0, (image_height - self.crop_height) // 2)
  86. ... x_max = min(image_width, x_min + self.crop_width)
  87. ... y_max = min(image_height, y_min + self.crop_height)
  88. ...
  89. ... return {"crop_coords": (x_min, y_min, x_max, y_max)}
  90. >>>
  91. >>> # Prepare sample data
  92. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  93. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  94. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  95. >>> bbox_labels = [1, 2]
  96. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  97. >>> keypoint_labels = [0, 1]
  98. >>>
  99. >>> # Use the custom transform in a pipeline
  100. >>> transform = A.Compose(
  101. ... [CustomCenterCrop(crop_height=80, crop_width=80)],
  102. ... bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  103. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels'])
  104. ... )
  105. >>>
  106. >>> # Apply the transform to data
  107. >>> result = transform(
  108. ... image=image,
  109. ... mask=mask,
  110. ... bboxes=bboxes,
  111. ... bbox_labels=bbox_labels,
  112. ... keypoints=keypoints,
  113. ... keypoint_labels=keypoint_labels
  114. ... )
  115. >>>
  116. >>> # Get the transformed data
  117. >>> transformed_image = result['image'] # Will be 80x80
  118. >>> transformed_mask = result['mask'] # Will be 80x80
  119. >>> transformed_bboxes = result['bboxes'] # Bounding boxes adjusted to the cropped area
  120. >>> transformed_bbox_labels = result['bbox_labels'] # Labels for bboxes that remain after cropping
  121. >>> transformed_keypoints = result['keypoints'] # Keypoints adjusted to the cropped area
  122. >>> transformed_keypoint_labels = result['keypoint_labels'] # Labels for keypoints that remain after cropping
  123. """
  124. _targets = ALL_TARGETS
  125. def apply(
  126. self,
  127. img: np.ndarray,
  128. crop_coords: tuple[int, int, int, int],
  129. **params: Any,
  130. ) -> np.ndarray:
  131. """Apply the crop transform to an image.
  132. Args:
  133. img (np.ndarray): The image to apply the crop transform to.
  134. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  135. params (dict[str, Any]): Additional parameters for the transform.
  136. Returns:
  137. np.ndarray: The cropped image.
  138. """
  139. return fcrops.crop(img, x_min=crop_coords[0], y_min=crop_coords[1], x_max=crop_coords[2], y_max=crop_coords[3])
  140. def apply_to_bboxes(
  141. self,
  142. bboxes: np.ndarray,
  143. crop_coords: tuple[int, int, int, int],
  144. **params: Any,
  145. ) -> np.ndarray:
  146. """Apply the crop transform to bounding boxes.
  147. Args:
  148. bboxes (np.ndarray): The bounding boxes to apply the crop transform to.
  149. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  150. params (dict[str, Any]): Additional parameters for the transform.
  151. Returns:
  152. np.ndarray: The cropped bounding boxes.
  153. """
  154. return fcrops.crop_bboxes_by_coords(bboxes, crop_coords, params["shape"][:2])
  155. def apply_to_keypoints(
  156. self,
  157. keypoints: np.ndarray,
  158. crop_coords: tuple[int, int, int, int],
  159. **params: Any,
  160. ) -> np.ndarray:
  161. """Apply the crop transform to keypoints.
  162. Args:
  163. keypoints (np.ndarray): The keypoints to apply the crop transform to.
  164. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  165. params (dict[str, Any]): Additional parameters for the transform.
  166. Returns:
  167. np.ndarray: The cropped keypoints.
  168. """
  169. return fcrops.crop_keypoints_by_coords(keypoints, crop_coords)
  170. def apply_to_images(
  171. self,
  172. images: np.ndarray,
  173. crop_coords: tuple[int, int, int, int],
  174. **params: Any,
  175. ) -> np.ndarray:
  176. return fcrops.volume_crop_yx(images, crop_coords[0], crop_coords[1], crop_coords[2], crop_coords[3])
  177. def apply_to_volume(
  178. self,
  179. volume: np.ndarray,
  180. crop_coords: tuple[int, int, int, int],
  181. **params: Any,
  182. ) -> np.ndarray:
  183. return self.apply_to_images(volume, crop_coords, **params)
  184. def apply_to_volumes(
  185. self,
  186. volumes: np.ndarray,
  187. crop_coords: tuple[int, int, int, int],
  188. **params: Any,
  189. ) -> np.ndarray:
  190. return fcrops.volumes_crop_yx(volumes, crop_coords[0], crop_coords[1], crop_coords[2], crop_coords[3])
  191. def apply_to_mask3d(
  192. self,
  193. mask3d: np.ndarray,
  194. crop_coords: tuple[int, int, int, int],
  195. **params: Any,
  196. ) -> np.ndarray:
  197. return self.apply_to_images(mask3d, crop_coords, **params)
  198. def apply_to_masks3d(
  199. self,
  200. masks3d: np.ndarray,
  201. crop_coords: tuple[int, int, int, int],
  202. **params: Any,
  203. ) -> np.ndarray:
  204. return self.apply_to_volumes(masks3d, crop_coords, **params)
  205. @staticmethod
  206. def _clip_bbox(bbox: tuple[int, int, int, int], image_shape: tuple[int, int]) -> tuple[int, int, int, int]:
  207. height, width = image_shape[:2]
  208. x_min, y_min, x_max, y_max = bbox
  209. x_min = np.clip(x_min, 0, width)
  210. y_min = np.clip(y_min, 0, height)
  211. x_max = np.clip(x_max, x_min, width)
  212. y_max = np.clip(y_max, y_min, height)
  213. return x_min, y_min, x_max, y_max
  214. class BaseCropAndPad(BaseCrop):
  215. """Base class for transforms that need both cropping and padding.
  216. This abstract class extends BaseCrop by adding padding capabilities. It's the foundation
  217. for transforms that may need to both crop parts of the input and add padding, such as when
  218. converting inputs to a specific target size. The class handles the complexities of applying
  219. these operations to different data types (images, masks, bounding boxes, keypoints) while
  220. maintaining their spatial relationships.
  221. Child classes must implement the `get_params_dependent_on_data` method to determine
  222. crop coordinates and padding parameters based on transform-specific logic.
  223. Args:
  224. pad_if_needed (bool): Whether to pad the input if the crop size exceeds input dimensions.
  225. border_mode (int): OpenCV border mode used for padding.
  226. fill (tuple[float, ...] | float): Value to fill the padded area if border_mode is BORDER_CONSTANT.
  227. For multi-channel images, this can be a tuple with a value for each channel.
  228. fill_mask (tuple[float, ...] | float): Value to fill the padded area in masks.
  229. pad_position (Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"]):
  230. Position of padding when pad_if_needed is True.
  231. p (float): Probability of applying the transform. Default: 1.0.
  232. Targets:
  233. image, mask, bboxes, keypoints, volume, mask3d
  234. Image types:
  235. uint8, float32
  236. Note:
  237. This class is not meant to be used directly. Instead, use or create derived
  238. transforms that implement the specific cropping and padding behavior required.
  239. Examples:
  240. >>> import numpy as np
  241. >>> import cv2
  242. >>> import albumentations as A
  243. >>> from albumentations.augmentations.crops.transforms import BaseCropAndPad
  244. >>>
  245. >>> # Example of a custom transform that inherits from BaseCropAndPad
  246. >>> # This transform crops to a fixed size, padding if needed to maintain dimensions
  247. >>> class CustomFixedSizeCrop(BaseCropAndPad):
  248. ... '''A custom fixed-size crop that pads if needed to maintain output size'''
  249. ... def __init__(
  250. ... self,
  251. ... height=224,
  252. ... width=224,
  253. ... offset_x=0, # Offset for crop position
  254. ... offset_y=0, # Offset for crop position
  255. ... pad_if_needed=True,
  256. ... border_mode=cv2.BORDER_CONSTANT,
  257. ... fill=0,
  258. ... fill_mask=0,
  259. ... pad_position="center",
  260. ... p=1.0,
  261. ... ):
  262. ... super().__init__(
  263. ... pad_if_needed=pad_if_needed,
  264. ... border_mode=border_mode,
  265. ... fill=fill,
  266. ... fill_mask=fill_mask,
  267. ... pad_position=pad_position,
  268. ... p=p,
  269. ... )
  270. ... self.height = height
  271. ... self.width = width
  272. ... self.offset_x = offset_x
  273. ... self.offset_y = offset_y
  274. ...
  275. ... def get_params_dependent_on_data(self, params, data):
  276. ... '''Calculate crop coordinates and padding if needed'''
  277. ... image_shape = params["shape"][:2]
  278. ... image_height, image_width = image_shape
  279. ...
  280. ... # Calculate crop coordinates with offsets
  281. ... x_min = self.offset_x
  282. ... y_min = self.offset_y
  283. ... x_max = min(x_min + self.width, image_width)
  284. ... y_max = min(y_min + self.height, image_height)
  285. ...
  286. ... # Get padding params if needed
  287. ... pad_params = self._get_pad_params(
  288. ... image_shape,
  289. ... (self.height, self.width)
  290. ... ) if self.pad_if_needed else None
  291. ...
  292. ... return {
  293. ... "crop_coords": (x_min, y_min, x_max, y_max),
  294. ... "pad_params": pad_params,
  295. ... }
  296. >>>
  297. >>> # Prepare sample data
  298. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  299. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  300. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  301. >>> bbox_labels = [1, 2]
  302. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  303. >>> keypoint_labels = [0, 1]
  304. >>>
  305. >>> # Use the custom transform in a pipeline
  306. >>> # This will create a 224x224 crop with padding as needed
  307. >>> transform = A.Compose(
  308. ... [CustomFixedSizeCrop(
  309. ... height=224,
  310. ... width=224,
  311. ... offset_x=20,
  312. ... offset_y=10,
  313. ... fill=127, # Gray color for padding
  314. ... fill_mask=0
  315. ... )],
  316. ... bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  317. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  318. >>>
  319. >>> # Apply the transform to data
  320. >>> result = transform(
  321. ... image=image,
  322. ... mask=mask,
  323. ... bboxes=bboxes,
  324. ... bbox_labels=bbox_labels,
  325. ... keypoints=keypoints,
  326. ... keypoint_labels=keypoint_labels
  327. ... )
  328. >>>
  329. >>> # Get the transformed data
  330. >>> transformed_image = result['image'] # Will be 224x224 with padding
  331. >>> transformed_mask = result['mask'] # Will be 224x224 with padding
  332. >>> transformed_bboxes = result['bboxes'] # Bounding boxes adjusted to the cropped and padded area
  333. >>> transformed_bbox_labels = result['bbox_labels'] # Bounding box labels after crop
  334. >>> transformed_keypoints = result['keypoints'] # Keypoints adjusted to the cropped and padded area
  335. >>> transformed_keypoint_labels = result['keypoint_labels'] # Keypoint labels after crop
  336. """
  337. class InitSchema(BaseTransformInitSchema):
  338. pad_if_needed: bool
  339. border_mode: Literal[
  340. cv2.BORDER_CONSTANT,
  341. cv2.BORDER_REPLICATE,
  342. cv2.BORDER_REFLECT,
  343. cv2.BORDER_WRAP,
  344. cv2.BORDER_REFLECT_101,
  345. ]
  346. fill: tuple[float, ...] | float
  347. fill_mask: tuple[float, ...] | float
  348. pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"]
  349. def __init__(
  350. self,
  351. pad_if_needed: bool,
  352. border_mode: Literal[
  353. cv2.BORDER_CONSTANT,
  354. cv2.BORDER_REPLICATE,
  355. cv2.BORDER_REFLECT,
  356. cv2.BORDER_WRAP,
  357. cv2.BORDER_REFLECT_101,
  358. ],
  359. fill: tuple[float, ...] | float,
  360. fill_mask: tuple[float, ...] | float,
  361. pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"],
  362. p: float,
  363. ):
  364. super().__init__(p=p)
  365. self.pad_if_needed = pad_if_needed
  366. self.border_mode = border_mode
  367. self.fill = fill
  368. self.fill_mask = fill_mask
  369. self.pad_position = pad_position
  370. def _get_pad_params(self, image_shape: tuple[int, int], target_shape: tuple[int, int]) -> dict[str, Any] | None:
  371. """Calculate padding parameters if needed."""
  372. if not self.pad_if_needed:
  373. return None
  374. h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = fgeometric.get_padding_params(
  375. image_shape=image_shape,
  376. min_height=target_shape[0],
  377. min_width=target_shape[1],
  378. pad_height_divisor=None,
  379. pad_width_divisor=None,
  380. )
  381. if h_pad_top == h_pad_bottom == w_pad_left == w_pad_right == 0:
  382. return None
  383. h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = fgeometric.adjust_padding_by_position(
  384. h_top=h_pad_top,
  385. h_bottom=h_pad_bottom,
  386. w_left=w_pad_left,
  387. w_right=w_pad_right,
  388. position=self.pad_position,
  389. py_random=self.py_random,
  390. )
  391. return {
  392. "pad_top": h_pad_top,
  393. "pad_bottom": h_pad_bottom,
  394. "pad_left": w_pad_left,
  395. "pad_right": w_pad_right,
  396. }
  397. def apply(
  398. self,
  399. img: np.ndarray,
  400. crop_coords: tuple[int, int, int, int],
  401. **params: Any,
  402. ) -> np.ndarray:
  403. """Apply the crop and pad transform to an image.
  404. Args:
  405. img (np.ndarray): The image to apply the crop and pad transform to.
  406. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  407. params (dict[str, Any]): Additional parameters for the transform.
  408. Returns:
  409. np.ndarray: The cropped and padded image.
  410. """
  411. pad_params = params.get("pad_params")
  412. if pad_params is not None:
  413. img = fgeometric.pad_with_params(
  414. img,
  415. pad_params["pad_top"],
  416. pad_params["pad_bottom"],
  417. pad_params["pad_left"],
  418. pad_params["pad_right"],
  419. border_mode=self.border_mode,
  420. value=self.fill,
  421. )
  422. return BaseCrop.apply(self, img, crop_coords, **params)
  423. def apply_to_mask(
  424. self,
  425. mask: np.ndarray,
  426. crop_coords: Any,
  427. **params: Any,
  428. ) -> np.ndarray:
  429. """Apply the crop and pad transform to a mask.
  430. Args:
  431. mask (np.ndarray): The mask to apply the crop and pad transform to.
  432. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  433. params (dict[str, Any]): Additional parameters for the transform.
  434. Returns:
  435. np.ndarray: The cropped and padded mask.
  436. """
  437. pad_params = params.get("pad_params")
  438. if pad_params is not None:
  439. mask = fgeometric.pad_with_params(
  440. mask,
  441. pad_params["pad_top"],
  442. pad_params["pad_bottom"],
  443. pad_params["pad_left"],
  444. pad_params["pad_right"],
  445. border_mode=self.border_mode,
  446. value=self.fill_mask,
  447. )
  448. # Note' that super().apply would apply the padding twice as it is looped to this.apply
  449. return BaseCrop.apply(self, mask, crop_coords=crop_coords, **params)
  450. def apply_to_images(
  451. self,
  452. images: np.ndarray,
  453. crop_coords: tuple[int, int, int, int],
  454. **params: Any,
  455. ) -> np.ndarray:
  456. pad_params = params.get("pad_params")
  457. if pad_params is not None:
  458. images = fcrops.pad_along_axes(
  459. images,
  460. pad_params["pad_top"],
  461. pad_params["pad_bottom"],
  462. pad_params["pad_left"],
  463. pad_params["pad_right"],
  464. h_axis=2,
  465. w_axis=3,
  466. border_mode=self.border_mode,
  467. pad_value=self.fill,
  468. )
  469. return BaseCrop.apply_to_images(self, images, crop_coords, **params)
  470. def apply_to_volume(
  471. self,
  472. volume: np.ndarray,
  473. crop_coords: tuple[int, int, int, int],
  474. **params: Any,
  475. ) -> np.ndarray:
  476. return self.apply_to_images(volume, crop_coords, **params)
  477. def apply_to_volumes(
  478. self,
  479. volumes: np.ndarray,
  480. crop_coords: tuple[int, int, int, int],
  481. **params: Any,
  482. ) -> np.ndarray:
  483. pad_params = params.get("pad_params")
  484. if pad_params is not None:
  485. volumes = fcrops.pad_along_axes(
  486. volumes,
  487. pad_params["pad_top"],
  488. pad_params["pad_bottom"],
  489. pad_params["pad_left"],
  490. pad_params["pad_right"],
  491. h_axis=3,
  492. w_axis=4,
  493. border_mode=self.border_mode,
  494. pad_value=self.fill,
  495. )
  496. return BaseCrop.apply_to_volumes(self, volumes, crop_coords, **params)
  497. def apply_to_mask3d(
  498. self,
  499. mask3d: np.ndarray,
  500. crop_coords: tuple[int, int, int, int],
  501. **params: Any,
  502. ) -> np.ndarray:
  503. return self.apply_to_images(mask3d, crop_coords, **params)
  504. def apply_to_masks3d(
  505. self,
  506. masks3d: np.ndarray,
  507. crop_coords: tuple[int, int, int, int],
  508. **params: Any,
  509. ) -> np.ndarray:
  510. return self.apply_to_volumes(masks3d, crop_coords, **params)
  511. def apply_to_bboxes(
  512. self,
  513. bboxes: np.ndarray,
  514. crop_coords: tuple[int, int, int, int],
  515. **params: Any,
  516. ) -> np.ndarray:
  517. """Apply the crop and pad transform to bounding boxes.
  518. Args:
  519. bboxes (np.ndarray): The bounding boxes to apply the crop and pad transform to.
  520. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  521. params (dict[str, Any]): Additional parameters for the transform.
  522. Returns:
  523. np.ndarray: The cropped and padded bounding boxes.
  524. """
  525. pad_params = params.get("pad_params")
  526. image_shape = params["shape"][:2]
  527. if pad_params is not None:
  528. # First denormalize bboxes to absolute coordinates
  529. bboxes_np = denormalize_bboxes(bboxes, image_shape)
  530. # Apply padding to bboxes (already works with absolute coordinates)
  531. bboxes_np = fgeometric.pad_bboxes(
  532. bboxes_np,
  533. pad_params["pad_top"],
  534. pad_params["pad_bottom"],
  535. pad_params["pad_left"],
  536. pad_params["pad_right"],
  537. self.border_mode,
  538. image_shape=image_shape,
  539. )
  540. # Update shape to padded dimensions
  541. padded_height = image_shape[0] + pad_params["pad_top"] + pad_params["pad_bottom"]
  542. padded_width = image_shape[1] + pad_params["pad_left"] + pad_params["pad_right"]
  543. padded_shape = (padded_height, padded_width)
  544. bboxes_np = normalize_bboxes(bboxes_np, padded_shape)
  545. params["shape"] = padded_shape
  546. return BaseCrop.apply_to_bboxes(self, bboxes_np, crop_coords, **params)
  547. # If no padding, use original function behavior
  548. return BaseCrop.apply_to_bboxes(self, bboxes, crop_coords, **params)
  549. def apply_to_keypoints(
  550. self,
  551. keypoints: np.ndarray,
  552. crop_coords: tuple[int, int, int, int],
  553. **params: Any,
  554. ) -> np.ndarray:
  555. """Apply the crop and pad transform to keypoints.
  556. Args:
  557. keypoints (np.ndarray): The keypoints to apply the crop and pad transform to.
  558. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  559. params (dict[str, Any]): Additional parameters for the transform.
  560. Returns:
  561. np.ndarray: The cropped and padded keypoints.
  562. """
  563. pad_params = params.get("pad_params")
  564. image_shape = params["shape"][:2]
  565. if pad_params is not None:
  566. # Calculate padded dimensions
  567. padded_height = image_shape[0] + pad_params["pad_top"] + pad_params["pad_bottom"]
  568. padded_width = image_shape[1] + pad_params["pad_left"] + pad_params["pad_right"]
  569. # First apply padding to keypoints using original image shape
  570. keypoints = fgeometric.pad_keypoints(
  571. keypoints,
  572. pad_params["pad_top"],
  573. pad_params["pad_bottom"],
  574. pad_params["pad_left"],
  575. pad_params["pad_right"],
  576. self.border_mode,
  577. image_shape=image_shape,
  578. )
  579. # Update image shape for subsequent crop operation
  580. params = {**params, "shape": (padded_height, padded_width)}
  581. return BaseCrop.apply_to_keypoints(self, keypoints, crop_coords, **params)
  582. class RandomCrop(BaseCropAndPad):
  583. """Crop a random part of the input.
  584. Args:
  585. height (int): height of the crop.
  586. width (int): width of the crop.
  587. pad_if_needed (bool): Whether to pad if crop size exceeds image size. Default: False.
  588. border_mode (OpenCV flag): OpenCV border mode used for padding. Default: cv2.BORDER_CONSTANT.
  589. fill (tuple[float, ...] | float): Padding value for images if border_mode is
  590. cv2.BORDER_CONSTANT. Default: 0.
  591. fill_mask (tuple[float, ...] | float): Padding value for masks if border_mode is
  592. cv2.BORDER_CONSTANT. Default: 0.
  593. pad_position (Literal['center', 'top_left', 'top_right', 'bottom_left', 'bottom_right', 'random']):
  594. Position of padding. Default: 'center'.
  595. p (float): Probability of applying the transform. Default: 1.0.
  596. Targets:
  597. image, mask, bboxes, keypoints, volume, mask3d
  598. Image types:
  599. uint8, float32
  600. Note:
  601. If pad_if_needed is True and crop size exceeds image dimensions, the image will be padded
  602. before applying the random crop.
  603. Examples:
  604. >>> import numpy as np
  605. >>> import albumentations as A
  606. >>> import cv2
  607. >>>
  608. >>> # Prepare sample data
  609. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  610. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  611. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  612. >>> bbox_labels = [1, 2]
  613. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  614. >>> keypoint_labels = [0, 1]
  615. >>>
  616. >>> # Example 1: Basic random crop
  617. >>> transform = A.Compose([
  618. ... A.RandomCrop(height=64, width=64),
  619. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  620. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  621. >>>
  622. >>> # Apply the transform
  623. >>> transformed = transform(
  624. ... image=image,
  625. ... mask=mask,
  626. ... bboxes=bboxes,
  627. ... bbox_labels=bbox_labels,
  628. ... keypoints=keypoints,
  629. ... keypoint_labels=keypoint_labels
  630. ... )
  631. >>>
  632. >>> # Get the transformed data
  633. >>> transformed_image = transformed['image'] # Will be 64x64
  634. >>> transformed_mask = transformed['mask'] # Will be 64x64
  635. >>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to the cropped area
  636. >>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for boxes that remain after cropping
  637. >>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to the cropped area
  638. >>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for keypoints that remain
  639. >>>
  640. >>> # Example 2: Random crop with padding when needed
  641. >>> # This is useful when you want to crop to a size larger than some images
  642. >>> transform_padded = A.Compose([
  643. ... A.RandomCrop(
  644. ... height=120, # Larger than original image height
  645. ... width=120, # Larger than original image width
  646. ... pad_if_needed=True,
  647. ... border_mode=cv2.BORDER_CONSTANT,
  648. ... fill=0, # Black padding for image
  649. ... fill_mask=0 # Zero padding for mask
  650. ... ),
  651. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  652. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  653. >>>
  654. >>> # Apply the padded transform
  655. >>> padded_transformed = transform_padded(
  656. ... image=image,
  657. ... mask=mask,
  658. ... bboxes=bboxes,
  659. ... bbox_labels=bbox_labels,
  660. ... keypoints=keypoints,
  661. ... keypoint_labels=keypoint_labels
  662. ... )
  663. >>>
  664. >>> # The result will be 120x120 with padding
  665. >>> padded_image = padded_transformed['image']
  666. >>> padded_mask = padded_transformed['mask']
  667. >>> padded_bboxes = padded_transformed['bboxes'] # Coordinates adjusted to the new dimensions
  668. """
  669. class InitSchema(BaseCropAndPad.InitSchema):
  670. height: Annotated[int, Field(ge=1)]
  671. width: Annotated[int, Field(ge=1)]
  672. border_mode: Literal[
  673. cv2.BORDER_CONSTANT,
  674. cv2.BORDER_REPLICATE,
  675. cv2.BORDER_REFLECT,
  676. cv2.BORDER_WRAP,
  677. cv2.BORDER_REFLECT_101,
  678. ]
  679. fill: tuple[float, ...] | float
  680. fill_mask: tuple[float, ...] | float
  681. def __init__(
  682. self,
  683. height: int,
  684. width: int,
  685. pad_if_needed: bool = False,
  686. pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"] = "center",
  687. border_mode: Literal[
  688. cv2.BORDER_CONSTANT,
  689. cv2.BORDER_REPLICATE,
  690. cv2.BORDER_REFLECT,
  691. cv2.BORDER_WRAP,
  692. cv2.BORDER_REFLECT_101,
  693. ] = cv2.BORDER_CONSTANT,
  694. fill: tuple[float, ...] | float = 0.0,
  695. fill_mask: tuple[float, ...] | float = 0.0,
  696. p: float = 1.0,
  697. ):
  698. super().__init__(
  699. pad_if_needed=pad_if_needed,
  700. border_mode=border_mode,
  701. fill=fill,
  702. fill_mask=fill_mask,
  703. pad_position=pad_position,
  704. p=p,
  705. )
  706. self.height = height
  707. self.width = width
  708. def get_params_dependent_on_data(
  709. self,
  710. params: dict[str, Any],
  711. data: dict[str, Any],
  712. ) -> dict[str, Any]: # Changed return type to be more flexible
  713. """Get parameters that depend on input data.
  714. Args:
  715. params (dict[str, Any]): Parameters.
  716. data (dict[str, Any]): Input data.
  717. Returns:
  718. dict[str, Any]: Dictionary with parameters.
  719. """
  720. image_shape = params["shape"][:2]
  721. image_height, image_width = image_shape
  722. if not self.pad_if_needed and (self.height > image_height or self.width > image_width):
  723. raise CropSizeError(
  724. f"Crop size (height, width) exceeds image dimensions (height, width):"
  725. f" {(self.height, self.width)} vs {image_shape[:2]}",
  726. )
  727. # Get padding params first if needed
  728. pad_params = self._get_pad_params(image_shape, (self.height, self.width))
  729. # If padding is needed, adjust the image shape for crop calculation
  730. if pad_params is not None:
  731. pad_top = pad_params["pad_top"]
  732. pad_bottom = pad_params["pad_bottom"]
  733. pad_left = pad_params["pad_left"]
  734. pad_right = pad_params["pad_right"]
  735. padded_height = image_height + pad_top + pad_bottom
  736. padded_width = image_width + pad_left + pad_right
  737. padded_shape = (padded_height, padded_width)
  738. # Get random crop coordinates based on padded dimensions
  739. h_start = self.py_random.random()
  740. w_start = self.py_random.random()
  741. crop_coords = fcrops.get_crop_coords(padded_shape, (self.height, self.width), h_start, w_start)
  742. else:
  743. # Get random crop coordinates based on original dimensions
  744. h_start = self.py_random.random()
  745. w_start = self.py_random.random()
  746. crop_coords = fcrops.get_crop_coords(image_shape, (self.height, self.width), h_start, w_start)
  747. return {
  748. "crop_coords": crop_coords,
  749. "pad_params": pad_params,
  750. }
  751. class CenterCrop(BaseCropAndPad):
  752. """Crop the central part of the input.
  753. This transform crops the center of the input image, mask, bounding boxes, and keypoints to the specified dimensions.
  754. It's useful when you want to focus on the central region of the input, discarding peripheral information.
  755. Args:
  756. height (int): The height of the crop. Must be greater than 0.
  757. width (int): The width of the crop. Must be greater than 0.
  758. pad_if_needed (bool): Whether to pad if crop size exceeds image size. Default: False.
  759. border_mode (OpenCV flag): OpenCV border mode used for padding. Default: cv2.BORDER_CONSTANT.
  760. fill (tuple[float, ...] | float): Padding value for images if border_mode is
  761. cv2.BORDER_CONSTANT. Default: 0.
  762. fill_mask (tuple[float, ...] | float): Padding value for masks if border_mode is
  763. cv2.BORDER_CONSTANT. Default: 0.
  764. pad_position (Literal['center', 'top_left', 'top_right', 'bottom_left', 'bottom_right', 'random']):
  765. Position of padding. Default: 'center'.
  766. p (float): Probability of applying the transform. Default: 1.0.
  767. Targets:
  768. image, mask, bboxes, keypoints, volume, mask3d
  769. Image types:
  770. uint8, float32
  771. Note:
  772. - If pad_if_needed is False and crop size exceeds image dimensions, it will raise a CropSizeError.
  773. - If pad_if_needed is True and crop size exceeds image dimensions, the image will be padded.
  774. - For bounding boxes and keypoints, coordinates are adjusted appropriately for both padding and cropping.
  775. Examples:
  776. >>> import numpy as np
  777. >>> import albumentations as A
  778. >>> import cv2
  779. >>>
  780. >>> # Prepare sample data
  781. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  782. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  783. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  784. >>> bbox_labels = [1, 2]
  785. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  786. >>> keypoint_labels = [0, 1]
  787. >>>
  788. >>> # Example 1: Basic center crop without padding
  789. >>> transform = A.Compose([
  790. ... A.CenterCrop(height=64, width=64),
  791. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  792. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  793. >>>
  794. >>> # Apply the transform
  795. >>> transformed = transform(
  796. ... image=image,
  797. ... mask=mask,
  798. ... bboxes=bboxes,
  799. ... bbox_labels=bbox_labels,
  800. ... keypoints=keypoints,
  801. ... keypoint_labels=keypoint_labels
  802. ... )
  803. >>>
  804. >>> # Get the transformed data
  805. >>> transformed_image = transformed['image'] # Will be 64x64
  806. >>> transformed_mask = transformed['mask'] # Will be 64x64
  807. >>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to the cropped area
  808. >>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for boxes that remain after cropping
  809. >>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to the cropped area
  810. >>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for keypoints that remain
  811. >>>
  812. >>> # Example 2: Center crop with padding when needed
  813. >>> transform_padded = A.Compose([
  814. ... A.CenterCrop(
  815. ... height=120, # Larger than original image height
  816. ... width=120, # Larger than original image width
  817. ... pad_if_needed=True,
  818. ... border_mode=cv2.BORDER_CONSTANT,
  819. ... fill=0, # Black padding for image
  820. ... fill_mask=0 # Zero padding for mask
  821. ... ),
  822. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  823. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  824. >>>
  825. >>> # Apply the padded transform
  826. >>> padded_transformed = transform_padded(
  827. ... image=image,
  828. ... mask=mask,
  829. ... bboxes=bboxes,
  830. ... bbox_labels=bbox_labels,
  831. ... keypoints=keypoints,
  832. ... keypoint_labels=keypoint_labels
  833. ... )
  834. >>>
  835. >>> # The result will be 120x120 with padding
  836. >>> padded_image = padded_transformed['image']
  837. >>> padded_mask = padded_transformed['mask']
  838. >>> padded_bboxes = padded_transformed['bboxes'] # Coordinates adjusted to the new dimensions
  839. >>> padded_keypoints = padded_transformed['keypoints'] # Coordinates adjusted to the new dimensions
  840. """
  841. class InitSchema(BaseCropAndPad.InitSchema):
  842. height: Annotated[int, Field(ge=1)]
  843. width: Annotated[int, Field(ge=1)]
  844. border_mode: Literal[
  845. cv2.BORDER_CONSTANT,
  846. cv2.BORDER_REPLICATE,
  847. cv2.BORDER_REFLECT,
  848. cv2.BORDER_WRAP,
  849. cv2.BORDER_REFLECT_101,
  850. ]
  851. fill: tuple[float, ...] | float
  852. fill_mask: tuple[float, ...] | float
  853. def __init__(
  854. self,
  855. height: int,
  856. width: int,
  857. pad_if_needed: bool = False,
  858. pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"] = "center",
  859. border_mode: Literal[
  860. cv2.BORDER_CONSTANT,
  861. cv2.BORDER_REPLICATE,
  862. cv2.BORDER_REFLECT,
  863. cv2.BORDER_WRAP,
  864. cv2.BORDER_REFLECT_101,
  865. ] = cv2.BORDER_CONSTANT,
  866. fill: tuple[float, ...] | float = 0.0,
  867. fill_mask: tuple[float, ...] | float = 0.0,
  868. p: float = 1.0,
  869. ):
  870. super().__init__(
  871. pad_if_needed=pad_if_needed,
  872. border_mode=border_mode,
  873. fill=fill,
  874. fill_mask=fill_mask,
  875. pad_position=pad_position,
  876. p=p,
  877. )
  878. self.height = height
  879. self.width = width
  880. def get_params_dependent_on_data(
  881. self,
  882. params: dict[str, Any],
  883. data: dict[str, Any],
  884. ) -> dict[str, Any]:
  885. """Get the parameters dependent on the data.
  886. Args:
  887. params (dict[str, Any]): The parameters of the transform.
  888. data (dict[str, Any]): The data of the transform.
  889. """
  890. image_shape = params["shape"][:2]
  891. image_height, image_width = image_shape
  892. if not self.pad_if_needed and (self.height > image_height or self.width > image_width):
  893. raise CropSizeError(
  894. f"Crop size (height, width) exceeds image dimensions (height, width):"
  895. f" {(self.height, self.width)} vs {image_shape[:2]}",
  896. )
  897. # Get padding params first if needed
  898. pad_params = self._get_pad_params(image_shape, (self.height, self.width))
  899. # If padding is needed, adjust the image shape for crop calculation
  900. if pad_params is not None:
  901. pad_top = pad_params["pad_top"]
  902. pad_bottom = pad_params["pad_bottom"]
  903. pad_left = pad_params["pad_left"]
  904. pad_right = pad_params["pad_right"]
  905. padded_height = image_height + pad_top + pad_bottom
  906. padded_width = image_width + pad_left + pad_right
  907. padded_shape = (padded_height, padded_width)
  908. # Get crop coordinates based on padded dimensions
  909. crop_coords = fcrops.get_center_crop_coords(padded_shape, (self.height, self.width))
  910. else:
  911. # Get crop coordinates based on original dimensions
  912. crop_coords = fcrops.get_center_crop_coords(image_shape, (self.height, self.width))
  913. return {
  914. "crop_coords": crop_coords,
  915. "pad_params": pad_params,
  916. }
  917. class Crop(BaseCropAndPad):
  918. """Crop a specific region from the input image.
  919. This transform crops a rectangular region from the input image, mask, bounding boxes, and keypoints
  920. based on specified coordinates. It's useful when you want to extract a specific area of interest
  921. from your inputs.
  922. Args:
  923. x_min (int): Minimum x-coordinate of the crop region (left edge). Must be >= 0. Default: 0.
  924. y_min (int): Minimum y-coordinate of the crop region (top edge). Must be >= 0. Default: 0.
  925. x_max (int): Maximum x-coordinate of the crop region (right edge). Must be > x_min. Default: 1024.
  926. y_max (int): Maximum y-coordinate of the crop region (bottom edge). Must be > y_min. Default: 1024.
  927. pad_if_needed (bool): Whether to pad if crop coordinates exceed image dimensions. Default: False.
  928. border_mode (OpenCV flag): OpenCV border mode used for padding. Default: cv2.BORDER_CONSTANT.
  929. fill (tuple[float, ...] | float): Padding value if border_mode is cv2.BORDER_CONSTANT. Default: 0.
  930. fill_mask (tuple[float, ...] | float): Padding value for masks. Default: 0.
  931. pad_position (Literal['center', 'top_left', 'top_right', 'bottom_left', 'bottom_right', 'random']):
  932. Position of padding. Default: 'center'.
  933. p (float): Probability of applying the transform. Default: 1.0.
  934. Targets:
  935. image, mask, bboxes, keypoints, volume, mask3d
  936. Image types:
  937. uint8, float32
  938. Note:
  939. - The crop coordinates are applied as follows: x_min <= x < x_max and y_min <= y < y_max.
  940. - If pad_if_needed is False and crop region extends beyond image boundaries, it will be clipped.
  941. - If pad_if_needed is True, image will be padded to accommodate the full crop region.
  942. - For bounding boxes and keypoints, coordinates are adjusted appropriately for both padding and cropping.
  943. Examples:
  944. >>> import numpy as np
  945. >>> import albumentations as A
  946. >>> import cv2
  947. >>>
  948. >>> # Prepare sample data
  949. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  950. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  951. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  952. >>> bbox_labels = [1, 2]
  953. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  954. >>> keypoint_labels = [0, 1]
  955. >>>
  956. >>> # Example 1: Basic crop with fixed coordinates
  957. >>> transform = A.Compose([
  958. ... A.Crop(x_min=20, y_min=20, x_max=80, y_max=80),
  959. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  960. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  961. >>>
  962. >>> # Apply the transform
  963. >>> transformed = transform(
  964. ... image=image,
  965. ... mask=mask,
  966. ... bboxes=bboxes,
  967. ... bbox_labels=bbox_labels,
  968. ... keypoints=keypoints,
  969. ... keypoint_labels=keypoint_labels
  970. ... )
  971. >>>
  972. >>> # Get the transformed data
  973. >>> transformed_image = transformed['image'] # Will be 60x60 - cropped from (20,20) to (80,80)
  974. >>> transformed_mask = transformed['mask'] # Will be 60x60
  975. >>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to the cropped area
  976. >>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for boxes that remain after cropping
  977. >>>
  978. >>> # Example 2: Crop with padding when the crop region extends beyond image dimensions
  979. >>> transform_padded = A.Compose([
  980. ... A.Crop(
  981. ... x_min=50, y_min=50, x_max=150, y_max=150, # Extends beyond the 100x100 image
  982. ... pad_if_needed=True,
  983. ... border_mode=cv2.BORDER_CONSTANT,
  984. ... fill=0, # Black padding for image
  985. ... fill_mask=0 # Zero padding for mask
  986. ... ),
  987. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  988. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  989. >>>
  990. >>> # Apply the padded transform
  991. >>> padded_transformed = transform_padded(
  992. ... image=image,
  993. ... mask=mask,
  994. ... bboxes=bboxes,
  995. ... bbox_labels=bbox_labels,
  996. ... keypoints=keypoints,
  997. ... keypoint_labels=keypoint_labels
  998. ... )
  999. >>>
  1000. >>> # The result will be 100x100 (50:150, 50:150) with padding on right and bottom
  1001. >>> padded_image = padded_transformed['image'] # 100x100 with 50 pixels of original + 50 pixels of padding
  1002. >>> padded_mask = padded_transformed['mask']
  1003. >>> padded_bboxes = padded_transformed['bboxes'] # Coordinates adjusted to the cropped and padded area
  1004. >>>
  1005. >>> # Example 3: Crop with reflection padding and custom position
  1006. >>> transform_reflect = A.Compose([
  1007. ... A.Crop(
  1008. ... x_min=-20, y_min=-20, x_max=80, y_max=80, # Negative coordinates (outside image)
  1009. ... pad_if_needed=True,
  1010. ... border_mode=cv2.BORDER_REFLECT_101, # Reflect image for padding
  1011. ... pad_position="top_left" # Apply padding at top-left
  1012. ... ),
  1013. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
  1014. >>>
  1015. >>> # The resulting crop will use reflection padding for the negative coordinates
  1016. >>> reflect_result = transform_reflect(
  1017. ... image=image,
  1018. ... bboxes=bboxes,
  1019. ... bbox_labels=bbox_labels
  1020. ... )
  1021. """
  1022. class InitSchema(BaseCropAndPad.InitSchema):
  1023. x_min: Annotated[int, Field(ge=0)]
  1024. y_min: Annotated[int, Field(ge=0)]
  1025. x_max: Annotated[int, Field(gt=0)]
  1026. y_max: Annotated[int, Field(gt=0)]
  1027. border_mode: Literal[
  1028. cv2.BORDER_CONSTANT,
  1029. cv2.BORDER_REPLICATE,
  1030. cv2.BORDER_REFLECT,
  1031. cv2.BORDER_WRAP,
  1032. cv2.BORDER_REFLECT_101,
  1033. ]
  1034. fill: tuple[float, ...] | float
  1035. fill_mask: tuple[float, ...] | float
  1036. @model_validator(mode="after")
  1037. def _validate_coordinates(self) -> Self:
  1038. if not self.x_min < self.x_max:
  1039. msg = "x_max must be greater than x_min"
  1040. raise ValueError(msg)
  1041. if not self.y_min < self.y_max:
  1042. msg = "y_max must be greater than y_min"
  1043. raise ValueError(msg)
  1044. return self
  1045. def __init__(
  1046. self,
  1047. x_min: int = 0,
  1048. y_min: int = 0,
  1049. x_max: int = 1024,
  1050. y_max: int = 1024,
  1051. pad_if_needed: bool = False,
  1052. pad_position: Literal["center", "top_left", "top_right", "bottom_left", "bottom_right", "random"] = "center",
  1053. border_mode: Literal[
  1054. cv2.BORDER_CONSTANT,
  1055. cv2.BORDER_REPLICATE,
  1056. cv2.BORDER_REFLECT,
  1057. cv2.BORDER_WRAP,
  1058. cv2.BORDER_REFLECT_101,
  1059. ] = cv2.BORDER_CONSTANT,
  1060. fill: tuple[float, ...] | float = 0,
  1061. fill_mask: tuple[float, ...] | float = 0,
  1062. p: float = 1.0,
  1063. ):
  1064. super().__init__(
  1065. pad_if_needed=pad_if_needed,
  1066. border_mode=border_mode,
  1067. fill=fill,
  1068. fill_mask=fill_mask,
  1069. pad_position=pad_position,
  1070. p=p,
  1071. )
  1072. self.x_min = x_min
  1073. self.y_min = y_min
  1074. self.x_max = x_max
  1075. self.y_max = y_max
  1076. # New helper function for computing minimum padding
  1077. def _compute_min_padding(self, image_height: int, image_width: int) -> tuple[int, int, int, int]:
  1078. pad_top = 0
  1079. pad_bottom = max(0, self.y_max - image_height)
  1080. pad_left = 0
  1081. pad_right = max(0, self.x_max - image_width)
  1082. return pad_top, pad_bottom, pad_left, pad_right
  1083. # New helper function for distributing and adjusting padding
  1084. def _compute_adjusted_padding(self, pad_top: int, pad_bottom: int, pad_left: int, pad_right: int) -> dict[str, int]:
  1085. delta_h = pad_top + pad_bottom
  1086. delta_w = pad_left + pad_right
  1087. pad_top_dist = delta_h // 2
  1088. pad_bottom_dist = delta_h - pad_top_dist
  1089. pad_left_dist = delta_w // 2
  1090. pad_right_dist = delta_w - pad_left_dist
  1091. (pad_top_adj, pad_bottom_adj, pad_left_adj, pad_right_adj) = fgeometric.adjust_padding_by_position(
  1092. h_top=pad_top_dist,
  1093. h_bottom=pad_bottom_dist,
  1094. w_left=pad_left_dist,
  1095. w_right=pad_right_dist,
  1096. position=self.pad_position,
  1097. py_random=self.py_random,
  1098. )
  1099. final_top = max(pad_top_adj, pad_top)
  1100. final_bottom = max(pad_bottom_adj, pad_bottom)
  1101. final_left = max(pad_left_adj, pad_left)
  1102. final_right = max(pad_right_adj, pad_right)
  1103. return {
  1104. "pad_top": final_top,
  1105. "pad_bottom": final_bottom,
  1106. "pad_left": final_left,
  1107. "pad_right": final_right,
  1108. }
  1109. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  1110. """Get parameters for crop.
  1111. Args:
  1112. params (dict): Dictionary with parameters for crop.
  1113. data (dict): Dictionary with data.
  1114. Returns:
  1115. dict: Dictionary with parameters for crop.
  1116. """
  1117. image_shape = params["shape"][:2]
  1118. image_height, image_width = image_shape
  1119. if not self.pad_if_needed:
  1120. return {"crop_coords": (self.x_min, self.y_min, self.x_max, self.y_max), "pad_params": None}
  1121. pad_top, pad_bottom, pad_left, pad_right = self._compute_min_padding(image_height, image_width)
  1122. pad_params = None
  1123. if any([pad_top, pad_bottom, pad_left, pad_right]):
  1124. pad_params = self._compute_adjusted_padding(pad_top, pad_bottom, pad_left, pad_right)
  1125. return {"crop_coords": (self.x_min, self.y_min, self.x_max, self.y_max), "pad_params": pad_params}
  1126. class CropNonEmptyMaskIfExists(BaseCrop):
  1127. """Crop area with mask if mask is non-empty, else make random crop.
  1128. This transform attempts to crop a region containing a mask (non-zero pixels). If the mask is empty or not provided,
  1129. it falls back to a random crop. This is particularly useful for segmentation tasks where you want to focus on
  1130. regions of interest defined by the mask.
  1131. Args:
  1132. height (int): Vertical size of crop in pixels. Must be > 0.
  1133. width (int): Horizontal size of crop in pixels. Must be > 0.
  1134. ignore_values (list of int, optional): Values to ignore in mask, `0` values are always ignored.
  1135. For example, if background value is 5, set `ignore_values=[5]` to ignore it. Default: None.
  1136. ignore_channels (list of int, optional): Channels to ignore in mask.
  1137. For example, if background is the first channel, set `ignore_channels=[0]` to ignore it. Default: None.
  1138. p (float): Probability of applying the transform. Default: 1.0.
  1139. Targets:
  1140. image, mask, bboxes, keypoints, volume, mask3d
  1141. Image types:
  1142. uint8, float32
  1143. Note:
  1144. - If a mask is provided, the transform will try to crop an area containing non-zero (or non-ignored) pixels.
  1145. - If no suitable area is found in the mask or no mask is provided, it will perform a random crop.
  1146. - The crop size (height, width) must not exceed the original image dimensions.
  1147. - Bounding boxes and keypoints are also cropped along with the image and mask.
  1148. Raises:
  1149. ValueError: If the specified crop size is larger than the input image dimensions.
  1150. Example:
  1151. >>> import numpy as np
  1152. >>> import albumentations as A
  1153. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  1154. >>> mask = np.zeros((100, 100), dtype=np.uint8)
  1155. >>> mask[25:75, 25:75] = 1 # Create a non-empty region in the mask
  1156. >>> transform = A.Compose([
  1157. ... A.CropNonEmptyMaskIfExists(height=50, width=50, p=1.0),
  1158. ... ])
  1159. >>> transformed = transform(image=image, mask=mask)
  1160. >>> transformed_image = transformed['image']
  1161. >>> transformed_mask = transformed['mask']
  1162. # The resulting crop will likely include part of the non-zero region in the mask
  1163. Raises:
  1164. ValueError: If the specified crop size is larger than the input image dimensions.
  1165. Examples:
  1166. >>> import numpy as np
  1167. >>> import albumentations as A
  1168. >>>
  1169. >>> # Prepare sample data
  1170. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  1171. >>> # Create a mask with non-empty region in the center
  1172. >>> mask = np.zeros((100, 100), dtype=np.uint8)
  1173. >>> mask[25:75, 25:75] = 1 # Create a non-empty region in the mask
  1174. >>>
  1175. >>> # Create bounding boxes and keypoints in the mask region
  1176. >>> bboxes = np.array([
  1177. ... [20, 20, 60, 60], # Box overlapping with non-empty region
  1178. ... [30, 30, 70, 70], # Box mostly inside non-empty region
  1179. ... ], dtype=np.float32)
  1180. >>> bbox_labels = ['cat', 'dog']
  1181. >>>
  1182. >>> # Add some keypoints inside mask region
  1183. >>> keypoints = np.array([
  1184. ... [40, 40], # Inside non-empty region
  1185. ... [60, 60], # At edge of non-empty region
  1186. ... [90, 90] # Outside non-empty region
  1187. ... ], dtype=np.float32)
  1188. >>> keypoint_labels = ['eye', 'nose', 'ear']
  1189. >>>
  1190. >>> # Define transform that will crop around the non-empty mask region
  1191. >>> transform = A.Compose([
  1192. ... A.CropNonEmptyMaskIfExists(
  1193. ... height=50,
  1194. ... width=50,
  1195. ... ignore_values=None,
  1196. ... ignore_channels=None,
  1197. ... p=1.0
  1198. ... ),
  1199. ... ], bbox_params=A.BboxParams(
  1200. ... format='pascal_voc',
  1201. ... label_fields=['bbox_labels']
  1202. ... ), keypoint_params=A.KeypointParams(
  1203. ... format='xy',
  1204. ... label_fields=['keypoint_labels']
  1205. ... ))
  1206. >>>
  1207. >>> # Apply the transform
  1208. >>> transformed = transform(
  1209. ... image=image,
  1210. ... mask=mask,
  1211. ... bboxes=bboxes,
  1212. ... bbox_labels=bbox_labels,
  1213. ... keypoints=keypoints,
  1214. ... keypoint_labels=keypoint_labels
  1215. ... )
  1216. >>>
  1217. >>> # Get the transformed data
  1218. >>> transformed_image = transformed['image'] # 50x50 image centered on mask region
  1219. >>> transformed_mask = transformed['mask'] # 50x50 mask showing part of non-empty region
  1220. >>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to new coordinates
  1221. >>> transformed_bbox_labels = transformed['bbox_labels'] # Labels preserved for visible boxes
  1222. >>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to new coordinates
  1223. >>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for visible keypoints
  1224. """
  1225. class InitSchema(BaseCrop.InitSchema):
  1226. ignore_values: list[int] | None
  1227. ignore_channels: list[int] | None
  1228. height: Annotated[int, Field(ge=1)]
  1229. width: Annotated[int, Field(ge=1)]
  1230. def __init__(
  1231. self,
  1232. height: int,
  1233. width: int,
  1234. ignore_values: list[int] | None = None,
  1235. ignore_channels: list[int] | None = None,
  1236. p: float = 1.0,
  1237. ):
  1238. super().__init__(p=p)
  1239. self.height = height
  1240. self.width = width
  1241. self.ignore_values = ignore_values
  1242. self.ignore_channels = ignore_channels
  1243. def _preprocess_mask(self, mask: np.ndarray) -> np.ndarray:
  1244. mask_height, mask_width = mask.shape[:2]
  1245. if self.ignore_values is not None:
  1246. ignore_values_np = np.array(self.ignore_values)
  1247. mask = np.where(np.isin(mask, ignore_values_np), 0, mask)
  1248. if mask.ndim == NUM_MULTI_CHANNEL_DIMENSIONS and self.ignore_channels is not None:
  1249. target_channels = np.array([ch for ch in range(mask.shape[-1]) if ch not in self.ignore_channels])
  1250. mask = np.take(mask, target_channels, axis=-1)
  1251. if self.height > mask_height or self.width > mask_width:
  1252. raise ValueError(
  1253. f"Crop size ({self.height},{self.width}) is larger than image ({mask_height},{mask_width})",
  1254. )
  1255. return mask
  1256. def get_params_dependent_on_data(
  1257. self,
  1258. params: dict[str, Any],
  1259. data: dict[str, Any],
  1260. ) -> dict[str, Any]:
  1261. """Get crop coordinates based on mask content.
  1262. Args:
  1263. params (dict[str, Any]): The parameters of the transform.
  1264. data (dict[str, Any]): The data of the transform.
  1265. """
  1266. if "mask" in data:
  1267. mask = self._preprocess_mask(data["mask"])
  1268. elif "masks" in data and len(data["masks"]):
  1269. masks = data["masks"]
  1270. mask = self._preprocess_mask(np.copy(masks[0]))
  1271. for m in masks[1:]:
  1272. mask |= self._preprocess_mask(m)
  1273. else:
  1274. msg = "Can not find mask for CropNonEmptyMaskIfExists"
  1275. raise RuntimeError(msg)
  1276. mask_height, mask_width = mask.shape[:2]
  1277. if mask.any():
  1278. # Find non-zero regions in mask
  1279. mask_sum = mask.sum(axis=-1) if mask.ndim == NUM_MULTI_CHANNEL_DIMENSIONS else mask
  1280. non_zero_yx = np.argwhere(mask_sum)
  1281. y, x = self.py_random.choice(non_zero_yx)
  1282. # Calculate crop coordinates centered around chosen point
  1283. x_min = x - self.py_random.randint(0, self.width - 1)
  1284. y_min = y - self.py_random.randint(0, self.height - 1)
  1285. x_min = np.clip(x_min, 0, mask_width - self.width)
  1286. y_min = np.clip(y_min, 0, mask_height - self.height)
  1287. else:
  1288. # Random crop if no non-zero regions
  1289. x_min = self.py_random.randint(0, mask_width - self.width)
  1290. y_min = self.py_random.randint(0, mask_height - self.height)
  1291. x_max = x_min + self.width
  1292. y_max = y_min + self.height
  1293. return {"crop_coords": (x_min, y_min, x_max, y_max)}
  1294. class BaseRandomSizedCropInitSchema(BaseTransformInitSchema):
  1295. size: Annotated[tuple[int, int], AfterValidator(check_range_bounds(1, None))]
  1296. class _BaseRandomSizedCrop(DualTransform):
  1297. """Base class for transforms that crop an image randomly and resize it to a specific size.
  1298. This abstract class provides the foundation for RandomSizedCrop and RandomResizedCrop transforms.
  1299. It handles cropping and resizing for different data types (image, mask, bboxes, keypoints) while
  1300. maintaining their spatial relationships.
  1301. Child classes must implement the `get_params_dependent_on_data` method to determine how the
  1302. crop coordinates are selected according to transform-specific parameters and logic.
  1303. Args:
  1304. size (tuple[int, int]): Target size (height, width) after cropping and resizing.
  1305. interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm
  1306. for image resizing. Default: cv2.INTER_LINEAR.
  1307. mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation
  1308. algorithm for mask resizing. Default: cv2.INTER_NEAREST.
  1309. area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
  1310. for downscaling. Options:
  1311. - None: No automatic interpolation selection, always use the specified interpolation method
  1312. - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
  1313. - "image_mask": Use INTER_AREA when downscaling both images and masks
  1314. Default: None.
  1315. p (float): Probability of applying the transform. Default: 1.0.
  1316. Targets:
  1317. image, mask, bboxes, keypoints, volume, mask3d
  1318. Image types:
  1319. uint8, float32
  1320. Note:
  1321. This class is not meant to be used directly. Instead, use derived transforms
  1322. like RandomSizedCrop or RandomResizedCrop that implement specific crop selection
  1323. strategies.
  1324. When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
  1325. downscaling (when the crop is larger than the target size), which provides better quality for size reduction.
  1326. Examples:
  1327. >>> import numpy as np
  1328. >>> import albumentations as A
  1329. >>> import cv2
  1330. >>>
  1331. >>> # Example of a custom transform that inherits from _BaseRandomSizedCrop
  1332. >>> class CustomRandomCrop(_BaseRandomSizedCrop):
  1333. ... def __init__(
  1334. ... self,
  1335. ... size=(224, 224),
  1336. ... custom_parameter=0.5,
  1337. ... interpolation=cv2.INTER_LINEAR,
  1338. ... mask_interpolation=cv2.INTER_NEAREST,
  1339. ... area_for_downscale="image",
  1340. ... p=1.0
  1341. ... ):
  1342. ... super().__init__(
  1343. ... size=size,
  1344. ... interpolation=interpolation,
  1345. ... mask_interpolation=mask_interpolation,
  1346. ... area_for_downscale=area_for_downscale,
  1347. ... p=p,
  1348. ... )
  1349. ... self.custom_parameter = custom_parameter
  1350. ...
  1351. ... def get_params_dependent_on_data(self, params, data):
  1352. ... # Custom logic to select crop coordinates
  1353. ... image_height, image_width = params["shape"][:2]
  1354. ...
  1355. ... # Simple example: calculate crop size based on custom_parameter
  1356. ... crop_height = int(image_height * self.custom_parameter)
  1357. ... crop_width = int(image_width * self.custom_parameter)
  1358. ...
  1359. ... # Random position
  1360. ... y1 = self.py_random.randint(0, image_height - crop_height + 1)
  1361. ... x1 = self.py_random.randint(0, image_width - crop_width + 1)
  1362. ... y2 = y1 + crop_height
  1363. ... x2 = x1 + crop_width
  1364. ...
  1365. ... return {"crop_coords": (x1, y1, x2, y2)}
  1366. >>>
  1367. >>> # Prepare sample data
  1368. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  1369. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  1370. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  1371. >>> bbox_labels = [1, 2]
  1372. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  1373. >>> keypoint_labels = [0, 1]
  1374. >>>
  1375. >>> # Create a pipeline with our custom transform
  1376. >>> transform = A.Compose(
  1377. ... [CustomRandomCrop(size=(64, 64), custom_parameter=0.6, area_for_downscale="image")],
  1378. ... bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  1379. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels'])
  1380. ... )
  1381. >>>
  1382. >>> # Apply the transform
  1383. >>> transformed = transform(
  1384. ... image=image,
  1385. ... mask=mask,
  1386. ... bboxes=bboxes,
  1387. ... bbox_labels=bbox_labels,
  1388. ... keypoints=keypoints,
  1389. ... keypoint_labels=keypoint_labels
  1390. ... )
  1391. >>>
  1392. >>> # Get the transformed data
  1393. >>> transformed_image = transformed['image'] # Will be 64x64
  1394. >>> transformed_mask = transformed['mask'] # Will be 64x64
  1395. >>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to new dimensions
  1396. >>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for bboxes that remain after cropping
  1397. >>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to new dimensions
  1398. >>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for keypoints that remain
  1399. """
  1400. class InitSchema(BaseRandomSizedCropInitSchema):
  1401. interpolation: Literal[
  1402. cv2.INTER_NEAREST,
  1403. cv2.INTER_NEAREST_EXACT,
  1404. cv2.INTER_LINEAR,
  1405. cv2.INTER_CUBIC,
  1406. cv2.INTER_AREA,
  1407. cv2.INTER_LANCZOS4,
  1408. cv2.INTER_LINEAR_EXACT,
  1409. ]
  1410. mask_interpolation: Literal[
  1411. cv2.INTER_NEAREST,
  1412. cv2.INTER_NEAREST_EXACT,
  1413. cv2.INTER_LINEAR,
  1414. cv2.INTER_CUBIC,
  1415. cv2.INTER_AREA,
  1416. cv2.INTER_LANCZOS4,
  1417. cv2.INTER_LINEAR_EXACT,
  1418. ]
  1419. area_for_downscale: Literal[None, "image", "image_mask"]
  1420. def __init__(
  1421. self,
  1422. size: tuple[int, int],
  1423. interpolation: Literal[
  1424. cv2.INTER_NEAREST,
  1425. cv2.INTER_NEAREST_EXACT,
  1426. cv2.INTER_LINEAR,
  1427. cv2.INTER_CUBIC,
  1428. cv2.INTER_AREA,
  1429. cv2.INTER_LANCZOS4,
  1430. cv2.INTER_LINEAR_EXACT,
  1431. ] = cv2.INTER_LINEAR,
  1432. mask_interpolation: Literal[
  1433. cv2.INTER_NEAREST,
  1434. cv2.INTER_NEAREST_EXACT,
  1435. cv2.INTER_LINEAR,
  1436. cv2.INTER_CUBIC,
  1437. cv2.INTER_AREA,
  1438. cv2.INTER_LANCZOS4,
  1439. cv2.INTER_LINEAR_EXACT,
  1440. ] = cv2.INTER_NEAREST,
  1441. area_for_downscale: Literal[None, "image", "image_mask"] = None,
  1442. p: float = 1.0,
  1443. ):
  1444. super().__init__(p=p)
  1445. self.size = size
  1446. self.interpolation = interpolation
  1447. self.mask_interpolation = mask_interpolation
  1448. self.area_for_downscale = area_for_downscale
  1449. def _get_interpolation_for_resize(self, crop_shape: tuple[int, int], target_type: str) -> int:
  1450. """Get the appropriate interpolation method for resizing.
  1451. Args:
  1452. crop_shape: Shape of the crop (height, width)
  1453. target_type: Either "image" or "mask" to determine base interpolation
  1454. Returns:
  1455. OpenCV interpolation flag
  1456. """
  1457. crop_height, crop_width = crop_shape
  1458. target_height, target_width = self.size
  1459. # Determine if this is downscaling
  1460. is_downscale = (crop_height > target_height) or (crop_width > target_width)
  1461. # Use INTER_AREA for downscaling if configured
  1462. if (is_downscale and (target_type == "image" and self.area_for_downscale in ["image", "image_mask"])) or (
  1463. target_type == "mask" and self.area_for_downscale == "image_mask"
  1464. ):
  1465. return cv2.INTER_AREA
  1466. # Get base interpolation
  1467. return self.interpolation if target_type == "image" else self.mask_interpolation
  1468. def apply(
  1469. self,
  1470. img: np.ndarray,
  1471. crop_coords: tuple[int, int, int, int],
  1472. **params: Any,
  1473. ) -> np.ndarray:
  1474. """Apply the crop to the image.
  1475. Args:
  1476. img (np.ndarray): The image to crop.
  1477. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  1478. **params (Any): Additional parameters.
  1479. """
  1480. crop = fcrops.crop(img, *crop_coords)
  1481. interpolation = self._get_interpolation_for_resize(crop.shape[:2], "image")
  1482. return fgeometric.resize(crop, self.size, interpolation)
  1483. def apply_to_mask(
  1484. self,
  1485. mask: np.ndarray,
  1486. crop_coords: tuple[int, int, int, int],
  1487. **params: Any,
  1488. ) -> np.ndarray:
  1489. """Apply the crop to the mask.
  1490. Args:
  1491. mask (np.ndarray): The mask to crop.
  1492. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  1493. **params (Any): Additional parameters.
  1494. """
  1495. crop = fcrops.crop(mask, *crop_coords)
  1496. interpolation = self._get_interpolation_for_resize(crop.shape[:2], "mask")
  1497. return fgeometric.resize(crop, self.size, interpolation)
  1498. def apply_to_bboxes(
  1499. self,
  1500. bboxes: np.ndarray,
  1501. crop_coords: tuple[int, int, int, int],
  1502. **params: Any,
  1503. ) -> np.ndarray:
  1504. """Apply the crop to the bounding boxes.
  1505. Args:
  1506. bboxes (np.ndarray): The bounding boxes to crop.
  1507. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  1508. **params (Any): Additional parameters.
  1509. """
  1510. return fcrops.crop_bboxes_by_coords(bboxes, crop_coords, params["shape"])
  1511. def apply_to_keypoints(
  1512. self,
  1513. keypoints: np.ndarray,
  1514. crop_coords: tuple[int, int, int, int],
  1515. **params: Any,
  1516. ) -> np.ndarray:
  1517. """Apply the crop to the keypoints.
  1518. Args:
  1519. keypoints (np.ndarray): The keypoints to crop.
  1520. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  1521. **params (Any): Additional parameters.
  1522. """
  1523. # First, crop the keypoints
  1524. cropped_keypoints = fcrops.crop_keypoints_by_coords(keypoints, crop_coords)
  1525. # Calculate the dimensions of the crop
  1526. crop_height = crop_coords[3] - crop_coords[1]
  1527. crop_width = crop_coords[2] - crop_coords[0]
  1528. # Calculate scaling factors
  1529. scale_x = self.size[1] / crop_width
  1530. scale_y = self.size[0] / crop_height
  1531. # Scale the cropped keypoints
  1532. return fgeometric.keypoints_scale(cropped_keypoints, scale_x, scale_y)
  1533. def apply_to_images(
  1534. self,
  1535. images: np.ndarray,
  1536. crop_coords: tuple[int, int, int, int],
  1537. **params: Any,
  1538. ) -> np.ndarray:
  1539. """Apply the crop and resize to a volume/images.
  1540. This method crops the volume first (reducing data size), then resizes using
  1541. a helper method with batch transform decorator.
  1542. Args:
  1543. images (np.ndarray): The volume/images to crop and resize with shape (D, H, W) or (D, H, W, C).
  1544. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  1545. **params (Any): Additional parameters.
  1546. """
  1547. # First crop the volume using volume_crop_yx (reduces data size)
  1548. crop = fcrops.volume_crop_yx(images, *crop_coords)
  1549. # Get interpolation method based on crop dimensions
  1550. interpolation = self._get_interpolation_for_resize(crop.shape[1:3], "image")
  1551. # Then resize the smaller cropped volume using the selected interpolation
  1552. return np.stack([fgeometric.resize(crop[i], self.size, interpolation) for i in range(images.shape[0])])
  1553. def apply_to_volume(
  1554. self,
  1555. volume: np.ndarray,
  1556. crop_coords: tuple[int, int, int, int],
  1557. **params: Any,
  1558. ) -> np.ndarray:
  1559. """Apply the crop and resize to a volume.
  1560. Args:
  1561. volume (np.ndarray): The volume to crop.
  1562. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  1563. **params (Any): Additional parameters.
  1564. """
  1565. return self.apply_to_images(volume, crop_coords, **params)
  1566. def apply_to_mask3d(
  1567. self,
  1568. mask3d: np.ndarray,
  1569. crop_coords: tuple[int, int, int, int],
  1570. **params: Any,
  1571. ) -> np.ndarray:
  1572. """Apply the crop and resize to a mask3d.
  1573. Args:
  1574. mask3d (np.ndarray): The mask3d to crop.
  1575. crop_coords (tuple[int, int, int, int]): The coordinates of the crop.
  1576. **params (Any): Additional parameters.
  1577. """
  1578. return self.apply_to_images(mask3d, crop_coords, **params)
  1579. class RandomSizedCrop(_BaseRandomSizedCrop):
  1580. """Crop a random part of the input and rescale it to a specific size.
  1581. This transform first crops a random portion of the input and then resizes it to a specified size.
  1582. The size of the random crop is controlled by the 'min_max_height' parameter.
  1583. Args:
  1584. min_max_height (tuple[int, int]): Minimum and maximum height of the crop in pixels.
  1585. size (tuple[int, int]): Target size for the output image, i.e. (height, width) after crop and resize.
  1586. w2h_ratio (float): Aspect ratio (width/height) of crop. Default: 1.0
  1587. interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
  1588. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  1589. Default: cv2.INTER_LINEAR.
  1590. mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
  1591. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  1592. Default: cv2.INTER_NEAREST.
  1593. area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
  1594. for downscaling. Options:
  1595. - None: No automatic interpolation selection, always use the specified interpolation method
  1596. - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
  1597. - "image_mask": Use INTER_AREA when downscaling both images and masks
  1598. Default: None.
  1599. p (float): Probability of applying the transform. Default: 1.0
  1600. Targets:
  1601. image, mask, bboxes, keypoints, volume, mask3d
  1602. Image types:
  1603. uint8, float32
  1604. Note:
  1605. - The crop size is randomly selected for each execution within the range specified by 'min_max_height'.
  1606. - The aspect ratio of the crop is determined by the 'w2h_ratio' parameter.
  1607. - After cropping, the result is resized to the specified 'size'.
  1608. - Bounding boxes that end up fully outside the cropped area will be removed.
  1609. - Keypoints that end up outside the cropped area will be removed.
  1610. - This transform differs from RandomResizedCrop in that it allows more control over the crop size
  1611. through the 'min_max_height' parameter, rather than using a scale parameter.
  1612. - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
  1613. downscaling (when the crop is larger than the target size), which provides better quality for size reduction.
  1614. Mathematical Details:
  1615. 1. A random crop height h is sampled from the range [min_max_height[0], min_max_height[1]].
  1616. 2. The crop width w is calculated as: w = h * w2h_ratio
  1617. 3. A random location for the crop is selected within the input image.
  1618. 4. The image is cropped to the size (h, w).
  1619. 5. The crop is then resized to the specified 'size'.
  1620. Examples:
  1621. >>> import numpy as np
  1622. >>> import albumentations as A
  1623. >>> import cv2
  1624. >>>
  1625. >>> # Prepare sample data
  1626. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  1627. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  1628. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  1629. >>> bbox_labels = [1, 2]
  1630. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  1631. >>> keypoint_labels = [0, 1]
  1632. >>>
  1633. >>> # Define transform with parameters as tuples
  1634. >>> transform = A.Compose([
  1635. ... A.RandomSizedCrop(
  1636. ... min_max_height=(50, 80),
  1637. ... size=(64, 64),
  1638. ... w2h_ratio=1.0,
  1639. ... interpolation=cv2.INTER_LINEAR,
  1640. ... mask_interpolation=cv2.INTER_NEAREST,
  1641. ... area_for_downscale="image", # Use INTER_AREA for image downscaling
  1642. ... p=1.0
  1643. ... ),
  1644. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  1645. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  1646. >>>
  1647. >>> # Apply the transform
  1648. >>> transformed = transform(
  1649. ... image=image,
  1650. ... mask=mask,
  1651. ... bboxes=bboxes,
  1652. ... bbox_labels=bbox_labels,
  1653. ... keypoints=keypoints,
  1654. ... keypoint_labels=keypoint_labels
  1655. ... )
  1656. >>>
  1657. >>> # Get the transformed data
  1658. >>> transformed_image = transformed['image'] # Shape: (64, 64, 3)
  1659. >>> transformed_mask = transformed['mask'] # Shape: (64, 64)
  1660. >>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to new crop and size
  1661. >>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for the preserved bboxes
  1662. >>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to new crop and size
  1663. >>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for the preserved keypoints
  1664. """
  1665. _targets = ALL_TARGETS
  1666. class InitSchema(BaseTransformInitSchema):
  1667. interpolation: Literal[
  1668. cv2.INTER_NEAREST,
  1669. cv2.INTER_NEAREST_EXACT,
  1670. cv2.INTER_LINEAR,
  1671. cv2.INTER_CUBIC,
  1672. cv2.INTER_AREA,
  1673. cv2.INTER_LANCZOS4,
  1674. cv2.INTER_LINEAR_EXACT,
  1675. ]
  1676. mask_interpolation: Literal[
  1677. cv2.INTER_NEAREST,
  1678. cv2.INTER_NEAREST_EXACT,
  1679. cv2.INTER_LINEAR,
  1680. cv2.INTER_CUBIC,
  1681. cv2.INTER_AREA,
  1682. cv2.INTER_LANCZOS4,
  1683. cv2.INTER_LINEAR_EXACT,
  1684. ]
  1685. min_max_height: OnePlusIntRangeType
  1686. w2h_ratio: Annotated[float, Field(gt=0)]
  1687. size: Annotated[tuple[int, int], AfterValidator(check_range_bounds(1, None))]
  1688. area_for_downscale: Literal[None, "image", "image_mask"]
  1689. def __init__(
  1690. self,
  1691. min_max_height: tuple[int, int],
  1692. size: tuple[int, int],
  1693. w2h_ratio: float = 1.0,
  1694. interpolation: Literal[
  1695. cv2.INTER_NEAREST,
  1696. cv2.INTER_NEAREST_EXACT,
  1697. cv2.INTER_LINEAR,
  1698. cv2.INTER_CUBIC,
  1699. cv2.INTER_AREA,
  1700. cv2.INTER_LANCZOS4,
  1701. cv2.INTER_LINEAR_EXACT,
  1702. ] = cv2.INTER_LINEAR,
  1703. mask_interpolation: Literal[
  1704. cv2.INTER_NEAREST,
  1705. cv2.INTER_NEAREST_EXACT,
  1706. cv2.INTER_LINEAR,
  1707. cv2.INTER_CUBIC,
  1708. cv2.INTER_AREA,
  1709. cv2.INTER_LANCZOS4,
  1710. cv2.INTER_LINEAR_EXACT,
  1711. ] = cv2.INTER_NEAREST,
  1712. area_for_downscale: Literal[None, "image", "image_mask"] = None,
  1713. p: float = 1.0,
  1714. ):
  1715. super().__init__(
  1716. size=size,
  1717. interpolation=interpolation,
  1718. mask_interpolation=mask_interpolation,
  1719. area_for_downscale=area_for_downscale,
  1720. p=p,
  1721. )
  1722. self.min_max_height = min_max_height
  1723. self.w2h_ratio = w2h_ratio
  1724. def get_params_dependent_on_data(
  1725. self,
  1726. params: dict[str, Any],
  1727. data: dict[str, Any],
  1728. ) -> dict[str, tuple[int, int, int, int]]:
  1729. """Get the parameters dependent on the data.
  1730. Args:
  1731. params (dict[str, Any]): The parameters of the transform.
  1732. data (dict[str, Any]): The data of the transform.
  1733. """
  1734. image_shape = params["shape"][:2]
  1735. crop_height = self.py_random.randint(*self.min_max_height)
  1736. crop_width = int(crop_height * self.w2h_ratio)
  1737. crop_shape = (crop_height, crop_width)
  1738. h_start = self.py_random.random()
  1739. w_start = self.py_random.random()
  1740. crop_coords = fcrops.get_crop_coords(image_shape, crop_shape, h_start, w_start)
  1741. return {"crop_coords": crop_coords}
  1742. class RandomResizedCrop(_BaseRandomSizedCrop):
  1743. """Crop a random part of the input and rescale it to a specified size.
  1744. This transform first crops a random portion of the input image (or mask, bounding boxes, keypoints)
  1745. and then resizes the crop to a specified size. It's particularly useful for training neural networks
  1746. on images of varying sizes and aspect ratios.
  1747. Args:
  1748. size (tuple[int, int]): Target size for the output image, i.e. (height, width) after crop and resize.
  1749. scale (tuple[float, float]): Range of the random size of the crop relative to the input size.
  1750. For example, (0.08, 1.0) means the crop size will be between 8% and 100% of the input size.
  1751. Default: (0.08, 1.0)
  1752. ratio (tuple[float, float]): Range of aspect ratios of the random crop.
  1753. For example, (0.75, 1.3333) allows crop aspect ratios from 3:4 to 4:3.
  1754. Default: (0.75, 1.3333333333333333)
  1755. interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
  1756. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  1757. Default: cv2.INTER_LINEAR
  1758. mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
  1759. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  1760. Default: cv2.INTER_NEAREST
  1761. area_for_downscale (Literal[None, "image", "image_mask"]): Controls automatic use of INTER_AREA interpolation
  1762. for downscaling. Options:
  1763. - None: No automatic interpolation selection, always use the specified interpolation method
  1764. - "image": Use INTER_AREA when downscaling images, retain specified interpolation for upscaling and masks
  1765. - "image_mask": Use INTER_AREA when downscaling both images and masks
  1766. Default: None.
  1767. p (float): Probability of applying the transform. Default: 1.0
  1768. Targets:
  1769. image, mask, bboxes, keypoints, volume, mask3d
  1770. Image types:
  1771. uint8, float32
  1772. Note:
  1773. - This transform attempts to crop a random area with an aspect ratio and relative size
  1774. specified by 'ratio' and 'scale' parameters. If it fails to find a suitable crop after
  1775. 10 attempts, it will return a crop from the center of the image.
  1776. - The crop's aspect ratio is defined as width / height.
  1777. - Bounding boxes that end up fully outside the cropped area will be removed.
  1778. - Keypoints that end up outside the cropped area will be removed.
  1779. - After cropping, the result is resized to the specified size.
  1780. - When area_for_downscale is set, INTER_AREA interpolation will be used automatically for
  1781. downscaling (when the crop is larger than the target size), which provides better quality for size reduction.
  1782. Mathematical Details:
  1783. 1. A target area A is sampled from the range [scale[0] * input_area, scale[1] * input_area].
  1784. 2. A target aspect ratio r is sampled from the range [ratio[0], ratio[1]].
  1785. 3. The crop width and height are computed as:
  1786. w = sqrt(A * r)
  1787. h = sqrt(A / r)
  1788. 4. If w and h are within the input image dimensions, the crop is accepted.
  1789. Otherwise, steps 1-3 are repeated (up to 10 times).
  1790. 5. If no valid crop is found after 10 attempts, a centered crop is taken.
  1791. 6. The crop is then resized to the specified size.
  1792. Examples:
  1793. >>> import numpy as np
  1794. >>> import albumentations as A
  1795. >>> import cv2
  1796. >>>
  1797. >>> # Prepare sample data
  1798. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  1799. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  1800. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  1801. >>> bbox_labels = [1, 2]
  1802. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  1803. >>> keypoint_labels = [0, 1]
  1804. >>>
  1805. >>> # Define transform with parameters as tuples
  1806. >>> transform = A.Compose([
  1807. ... A.RandomResizedCrop(
  1808. ... size=(64, 64),
  1809. ... scale=(0.5, 0.9), # Crop size will be 50-90% of original image
  1810. ... ratio=(0.75, 1.33), # Aspect ratio will vary from 3:4 to 4:3
  1811. ... interpolation=cv2.INTER_LINEAR,
  1812. ... mask_interpolation=cv2.INTER_NEAREST,
  1813. ... area_for_downscale="image", # Use INTER_AREA for image downscaling
  1814. ... p=1.0
  1815. ... ),
  1816. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  1817. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  1818. >>>
  1819. >>> # Apply the transform
  1820. >>> transformed = transform(
  1821. ... image=image,
  1822. ... mask=mask,
  1823. ... bboxes=bboxes,
  1824. ... bbox_labels=bbox_labels,
  1825. ... keypoints=keypoints,
  1826. ... keypoint_labels=keypoint_labels
  1827. ... )
  1828. >>>
  1829. >>> # Get the transformed data
  1830. >>> transformed_image = transformed['image'] # Shape: (64, 64, 3)
  1831. >>> transformed_mask = transformed['mask'] # Shape: (64, 64)
  1832. >>> transformed_bboxes = transformed['bboxes'] # Bounding boxes adjusted to new crop and size
  1833. >>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for the preserved bboxes
  1834. >>> transformed_keypoints = transformed['keypoints'] # Keypoints adjusted to new crop and size
  1835. >>> transformed_keypoint_labels = transformed['keypoint_labels'] # Labels for the preserved keypoints
  1836. """
  1837. _targets = ALL_TARGETS
  1838. class InitSchema(BaseTransformInitSchema):
  1839. scale: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, 1)), AfterValidator(nondecreasing)]
  1840. ratio: Annotated[
  1841. tuple[float, float],
  1842. AfterValidator(check_range_bounds(0, None)),
  1843. AfterValidator(nondecreasing),
  1844. ]
  1845. size: Annotated[tuple[int, int], AfterValidator(check_range_bounds(1, None))]
  1846. interpolation: Literal[
  1847. cv2.INTER_NEAREST,
  1848. cv2.INTER_NEAREST_EXACT,
  1849. cv2.INTER_LINEAR,
  1850. cv2.INTER_CUBIC,
  1851. cv2.INTER_AREA,
  1852. cv2.INTER_LANCZOS4,
  1853. cv2.INTER_LINEAR_EXACT,
  1854. ]
  1855. mask_interpolation: Literal[
  1856. cv2.INTER_NEAREST,
  1857. cv2.INTER_NEAREST_EXACT,
  1858. cv2.INTER_LINEAR,
  1859. cv2.INTER_CUBIC,
  1860. cv2.INTER_AREA,
  1861. cv2.INTER_LANCZOS4,
  1862. cv2.INTER_LINEAR_EXACT,
  1863. ]
  1864. area_for_downscale: Literal[None, "image", "image_mask"]
  1865. def __init__(
  1866. self,
  1867. size: tuple[int, int],
  1868. scale: tuple[float, float] = (0.08, 1.0),
  1869. ratio: tuple[float, float] = (0.75, 1.3333333333333333),
  1870. interpolation: Literal[
  1871. cv2.INTER_NEAREST,
  1872. cv2.INTER_NEAREST_EXACT,
  1873. cv2.INTER_LINEAR,
  1874. cv2.INTER_CUBIC,
  1875. cv2.INTER_AREA,
  1876. cv2.INTER_LANCZOS4,
  1877. cv2.INTER_LINEAR_EXACT,
  1878. ] = cv2.INTER_LINEAR,
  1879. mask_interpolation: Literal[
  1880. cv2.INTER_NEAREST,
  1881. cv2.INTER_NEAREST_EXACT,
  1882. cv2.INTER_LINEAR,
  1883. cv2.INTER_CUBIC,
  1884. cv2.INTER_AREA,
  1885. cv2.INTER_LANCZOS4,
  1886. cv2.INTER_LINEAR_EXACT,
  1887. ] = cv2.INTER_NEAREST,
  1888. area_for_downscale: Literal[None, "image", "image_mask"] = None,
  1889. p: float = 1.0,
  1890. ):
  1891. super().__init__(
  1892. size=size,
  1893. interpolation=interpolation,
  1894. mask_interpolation=mask_interpolation,
  1895. area_for_downscale=area_for_downscale,
  1896. p=p,
  1897. )
  1898. self.scale = scale
  1899. self.ratio = ratio
  1900. def get_params_dependent_on_data(
  1901. self,
  1902. params: dict[str, Any],
  1903. data: dict[str, Any],
  1904. ) -> dict[str, tuple[int, int, int, int]]:
  1905. """Get the parameters dependent on the data.
  1906. Args:
  1907. params (dict[str, Any]): The parameters of the transform.
  1908. data (dict[str, Any]): The data of the transform.
  1909. """
  1910. image_shape = params["shape"][:2]
  1911. image_height, image_width = image_shape
  1912. area = image_height * image_width
  1913. # Pre-compute constants to avoid repeated calculations
  1914. scale_min_area = self.scale[0] * area
  1915. scale_max_area = self.scale[1] * area
  1916. log_ratio_min = math.log(self.ratio[0])
  1917. log_ratio_max = math.log(self.ratio[1])
  1918. for _ in range(10):
  1919. target_area = self.py_random.uniform(scale_min_area, scale_max_area)
  1920. aspect_ratio = math.exp(self.py_random.uniform(log_ratio_min, log_ratio_max))
  1921. width = round(math.sqrt(target_area * aspect_ratio))
  1922. height = round(math.sqrt(target_area / aspect_ratio))
  1923. if 0 < width <= image_width and 0 < height <= image_height:
  1924. h_start = self.py_random.random()
  1925. w_start = self.py_random.random()
  1926. crop_coords = fcrops.get_crop_coords(image_shape, (height, width), h_start, w_start)
  1927. return {"crop_coords": crop_coords}
  1928. # Fallback to central crop - use proper function
  1929. in_ratio = image_width / image_height
  1930. if in_ratio < self.ratio[0]:
  1931. width = image_width
  1932. height = round(image_width / self.ratio[0])
  1933. elif in_ratio > self.ratio[1]:
  1934. height = image_height
  1935. width = round(height * self.ratio[1])
  1936. else: # whole image
  1937. width = image_width
  1938. height = image_height
  1939. crop_coords = fcrops.get_center_crop_coords(image_shape, (height, width))
  1940. return {"crop_coords": crop_coords}
  1941. class RandomCropNearBBox(BaseCrop):
  1942. """Crop bbox from image with random shift by x,y coordinates
  1943. Args:
  1944. max_part_shift (float, (float, float)): Max shift in `height` and `width` dimensions relative
  1945. to `cropping_bbox` dimension.
  1946. If max_part_shift is a single float, the range will be (0, max_part_shift).
  1947. Default (0, 0.3).
  1948. cropping_bbox_key (str): Additional target key for cropping box. Default `cropping_bbox`.
  1949. p (float): probability of applying the transform. Default: 1.
  1950. Targets:
  1951. image, mask, bboxes, keypoints, volume, mask3d
  1952. Image types:
  1953. uint8, float32
  1954. Examples:
  1955. >>> aug = Compose([RandomCropNearBBox(max_part_shift=(0.1, 0.5), cropping_bbox_key='test_bbox')],
  1956. >>> bbox_params=BboxParams("pascal_voc"))
  1957. >>> result = aug(image=image, bboxes=bboxes, test_bbox=[0, 5, 10, 20])
  1958. """
  1959. _targets = ALL_TARGETS
  1960. class InitSchema(BaseTransformInitSchema):
  1961. max_part_shift: ZeroOneRangeType
  1962. cropping_bbox_key: str
  1963. def __init__(
  1964. self,
  1965. max_part_shift: tuple[float, float] | float = (0, 0.3),
  1966. cropping_bbox_key: str = "cropping_bbox",
  1967. p: float = 1.0,
  1968. ):
  1969. super().__init__(p=p)
  1970. self.max_part_shift = cast("tuple[float, float]", max_part_shift)
  1971. self.cropping_bbox_key = cropping_bbox_key
  1972. def get_params_dependent_on_data(
  1973. self,
  1974. params: dict[str, Any],
  1975. data: dict[str, Any],
  1976. ) -> dict[str, tuple[float, ...]]:
  1977. """Get the parameters dependent on the data.
  1978. Args:
  1979. params (dict[str, Any]): The parameters of the transform.
  1980. data (dict[str, Any]): The data of the transform.
  1981. """
  1982. bbox = data[self.cropping_bbox_key]
  1983. image_shape = params["shape"][:2]
  1984. bbox = self._clip_bbox(bbox, image_shape)
  1985. h_max_shift = round((bbox[3] - bbox[1]) * self.max_part_shift[0])
  1986. w_max_shift = round((bbox[2] - bbox[0]) * self.max_part_shift[1])
  1987. x_min = bbox[0] - self.py_random.randint(-w_max_shift, w_max_shift)
  1988. x_max = bbox[2] + self.py_random.randint(-w_max_shift, w_max_shift)
  1989. y_min = bbox[1] - self.py_random.randint(-h_max_shift, h_max_shift)
  1990. y_max = bbox[3] + self.py_random.randint(-h_max_shift, h_max_shift)
  1991. crop_coords = self._clip_bbox((x_min, y_min, x_max, y_max), image_shape)
  1992. if crop_coords[0] == crop_coords[2] or crop_coords[1] == crop_coords[3]:
  1993. crop_shape = (bbox[3] - bbox[1], bbox[2] - bbox[0])
  1994. crop_coords = fcrops.get_center_crop_coords(image_shape, crop_shape)
  1995. return {"crop_coords": crop_coords}
  1996. @property
  1997. def targets_as_params(self) -> list[str]:
  1998. """Get the targets as parameters.
  1999. Returns:
  2000. list[str]: The targets as parameters.
  2001. """
  2002. return [self.cropping_bbox_key]
  2003. class BBoxSafeRandomCrop(BaseCrop):
  2004. """Crop an area from image while ensuring all bounding boxes are preserved in the crop.
  2005. Similar to AtLeastOneBboxRandomCrop, but with a key difference:
  2006. - BBoxSafeRandomCrop ensures ALL bounding boxes are preserved in the crop when erosion_rate=0.0
  2007. - AtLeastOneBboxRandomCrop ensures AT LEAST ONE bounding box is present in the crop
  2008. This makes BBoxSafeRandomCrop more suitable for scenarios where:
  2009. - You need to preserve all objects in the scene
  2010. - Losing any bounding box would be problematic (e.g., rare object classes)
  2011. - You're training a model that needs to detect multiple objects simultaneously
  2012. The algorithm:
  2013. 1. If bounding boxes exist:
  2014. - Computes the union of all bounding boxes
  2015. - Applies erosion based on erosion_rate to this union
  2016. - Clips the eroded union to valid image coordinates [0,1]
  2017. - Randomly samples crop coordinates within the clipped union area
  2018. 2. If no bounding boxes exist:
  2019. - Computes crop height based on erosion_rate
  2020. - Sets crop width to maintain original aspect ratio
  2021. - Randomly places the crop within the image
  2022. Args:
  2023. erosion_rate (float): Controls how much the valid crop region can deviate from the bbox union.
  2024. Must be in range [0.0, 1.0].
  2025. - 0.0: crop must contain the exact bbox union (safest option that guarantees all boxes are preserved)
  2026. - 1.0: crop can deviate maximally from the bbox union (increases likelihood of cutting off some boxes)
  2027. Defaults to 0.0.
  2028. p (float, optional): Probability of applying the transform. Defaults to 1.0.
  2029. Targets:
  2030. image, mask, bboxes, keypoints, volume, mask3d
  2031. Image types:
  2032. uint8, float32
  2033. Raises:
  2034. CropSizeError: If requested crop size exceeds image dimensions
  2035. Examples:
  2036. >>> import numpy as np
  2037. >>> import albumentations as A
  2038. >>>
  2039. >>> # Prepare sample data
  2040. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  2041. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  2042. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  2043. >>> bbox_labels = [1, 2]
  2044. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  2045. >>> keypoint_labels = [0, 1]
  2046. >>>
  2047. >>> # Define transform with erosion_rate parameter
  2048. >>> transform = A.Compose([
  2049. ... A.BBoxSafeRandomCrop(erosion_rate=0.2),
  2050. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  2051. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  2052. >>>
  2053. >>> # Apply the transform
  2054. >>> result = transform(
  2055. ... image=image,
  2056. ... mask=mask,
  2057. ... bboxes=bboxes,
  2058. ... bbox_labels=bbox_labels,
  2059. ... keypoints=keypoints,
  2060. ... keypoint_labels=keypoint_labels
  2061. ... )
  2062. >>>
  2063. >>> # Get the transformed data
  2064. >>> transformed_image = result['image'] # Cropped image containing all bboxes
  2065. >>> transformed_mask = result['mask'] # Cropped mask
  2066. >>> transformed_bboxes = result['bboxes'] # All bounding boxes preserved with adjusted coordinates
  2067. >>> transformed_bbox_labels = result['bbox_labels'] # Original labels preserved
  2068. >>> transformed_keypoints = result['keypoints'] # Keypoints with adjusted coordinates
  2069. >>> transformed_keypoint_labels = result['keypoint_labels'] # Original keypoint labels preserved
  2070. >>>
  2071. >>> # Example with a different erosion_rate
  2072. >>> transform_more_flexible = A.Compose([
  2073. ... A.BBoxSafeRandomCrop(erosion_rate=0.5), # More flexibility in crop placement
  2074. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
  2075. >>>
  2076. >>> # Apply transform with only image and bboxes
  2077. >>> result_bboxes_only = transform_more_flexible(
  2078. ... image=image,
  2079. ... bboxes=bboxes,
  2080. ... bbox_labels=bbox_labels
  2081. ... )
  2082. >>> transformed_image = result_bboxes_only['image']
  2083. >>> transformed_bboxes = result_bboxes_only['bboxes'] # All bboxes still preserved
  2084. Note:
  2085. - IMPORTANT: Using erosion_rate > 0.0 may result in some bounding boxes being cut off,
  2086. particularly narrow boxes at the boundary of the union area. For guaranteed preservation
  2087. of all bounding boxes, use erosion_rate=0.0.
  2088. - Aspect ratio is preserved only when no bounding boxes are present
  2089. - May be more restrictive in crop placement compared to AtLeastOneBboxRandomCrop
  2090. - The crop size is determined by the bounding boxes when present
  2091. """
  2092. _targets = ALL_TARGETS
  2093. class InitSchema(BaseTransformInitSchema):
  2094. erosion_rate: float = Field(
  2095. ge=0.0,
  2096. le=1.0,
  2097. )
  2098. def __init__(self, erosion_rate: float = 0.0, p: float = 1.0):
  2099. super().__init__(p=p)
  2100. self.erosion_rate = erosion_rate
  2101. def _get_coords_no_bbox(self, image_shape: tuple[int, int]) -> tuple[int, int, int, int]:
  2102. image_height, image_width = image_shape
  2103. erosive_h = int(image_height * (1.0 - self.erosion_rate))
  2104. crop_height = image_height if erosive_h >= image_height else self.py_random.randint(erosive_h, image_height)
  2105. crop_width = int(crop_height * image_width / image_height)
  2106. h_start = self.py_random.random()
  2107. w_start = self.py_random.random()
  2108. crop_shape = (crop_height, crop_width)
  2109. return fcrops.get_crop_coords(image_shape, crop_shape, h_start, w_start)
  2110. def get_params_dependent_on_data(
  2111. self,
  2112. params: dict[str, Any],
  2113. data: dict[str, Any],
  2114. ) -> dict[str, tuple[int, int, int, int]]:
  2115. """Get the parameters dependent on the data.
  2116. Args:
  2117. params (dict[str, Any]): The parameters of the transform.
  2118. data (dict[str, Any]): The data of the transform.
  2119. """
  2120. image_shape = params["shape"][:2]
  2121. if len(data["bboxes"]) == 0: # less likely, this class is for use with bboxes.
  2122. crop_coords = self._get_coords_no_bbox(image_shape)
  2123. return {"crop_coords": crop_coords}
  2124. bbox_union = union_of_bboxes(bboxes=data["bboxes"], erosion_rate=self.erosion_rate)
  2125. if bbox_union is None:
  2126. crop_coords = self._get_coords_no_bbox(image_shape)
  2127. return {"crop_coords": crop_coords}
  2128. x_min, y_min, x_max, y_max = bbox_union
  2129. x_min = np.clip(x_min, 0, 1)
  2130. y_min = np.clip(y_min, 0, 1)
  2131. x_max = np.clip(x_max, x_min, 1)
  2132. y_max = np.clip(y_max, y_min, 1)
  2133. image_height, image_width = image_shape
  2134. crop_x_min = int(x_min * self.py_random.random() * image_width)
  2135. crop_y_min = int(y_min * self.py_random.random() * image_height)
  2136. bbox_xmax = x_max + (1 - x_max) * self.py_random.random()
  2137. bbox_ymax = y_max + (1 - y_max) * self.py_random.random()
  2138. crop_x_max = int(bbox_xmax * image_width)
  2139. crop_y_max = int(bbox_ymax * image_height)
  2140. return {"crop_coords": (crop_x_min, crop_y_min, crop_x_max, crop_y_max)}
  2141. class RandomSizedBBoxSafeCrop(BBoxSafeRandomCrop):
  2142. """Crop a random part of the input and rescale it to a specific size without loss of bounding boxes.
  2143. This transform first attempts to crop a random portion of the input image while ensuring that all bounding boxes
  2144. remain within the cropped area. It then resizes the crop to the specified size. This is particularly useful for
  2145. object detection tasks where preserving all objects in the image is crucial while also standardizing the image size.
  2146. Args:
  2147. height (int): Height of the output image after resizing.
  2148. width (int): Width of the output image after resizing.
  2149. erosion_rate (float): A value between 0.0 and 1.0 that determines the minimum allowable size of the crop
  2150. as a fraction of the original image size. For example, an erosion_rate of 0.2 means the crop will be
  2151. at least 80% of the original image height and width. Default: 0.0 (no minimum size).
  2152. interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
  2153. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.AREA, cv2.INTER_LANCZOS4.
  2154. Default: cv2.INTER_LINEAR.
  2155. mask_interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm for mask.
  2156. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.AREA, cv2.INTER_LANCZOS4.
  2157. Default: cv2.INTER_NEAREST.
  2158. p (float): Probability of applying the transform. Default: 1.0.
  2159. Targets:
  2160. image, mask, bboxes, keypoints, volume, mask3d
  2161. Image types:
  2162. uint8, float32
  2163. Note:
  2164. - This transform ensures that all bounding boxes in the original image are fully contained within the
  2165. cropped area. If it's not possible to find such a crop (e.g., when bounding boxes are too spread out),
  2166. it will default to cropping the entire image.
  2167. - After cropping, the result is resized to the specified (height, width) size.
  2168. - Bounding box coordinates are adjusted to match the new image size.
  2169. - Keypoints are moved along with the crop and scaled to the new image size.
  2170. - If there are no bounding boxes in the image, it will fall back to a random crop.
  2171. Mathematical Details:
  2172. 1. A crop region is selected that includes all bounding boxes.
  2173. 2. The crop size is determined by the erosion_rate:
  2174. min_crop_size = (1 - erosion_rate) * original_size
  2175. 3. If the selected crop is smaller than min_crop_size, it's expanded to meet this requirement.
  2176. 4. The crop is then resized to the specified (height, width) size.
  2177. 5. Bounding box coordinates are transformed to match the new image size:
  2178. new_coord = (old_coord - crop_start) * (new_size / crop_size)
  2179. Examples:
  2180. >>> import numpy as np
  2181. >>> import albumentations as A
  2182. >>> import cv2
  2183. >>>
  2184. >>> # Prepare sample data
  2185. >>> image = np.random.randint(0, 256, (300, 300, 3), dtype=np.uint8)
  2186. >>> mask = np.random.randint(0, 2, (300, 300), dtype=np.uint8)
  2187. >>>
  2188. >>> # Create bounding boxes with some overlap and separation
  2189. >>> bboxes = np.array([
  2190. ... [10, 10, 80, 80], # top-left box
  2191. ... [100, 100, 200, 200], # center box
  2192. ... [210, 210, 290, 290] # bottom-right box
  2193. ... ], dtype=np.float32)
  2194. >>> bbox_labels = ['cat', 'dog', 'bird']
  2195. >>>
  2196. >>> # Create keypoints inside the bounding boxes
  2197. >>> keypoints = np.array([
  2198. ... [45, 45], # inside first box
  2199. ... [150, 150], # inside second box
  2200. ... [250, 250] # inside third box
  2201. ... ], dtype=np.float32)
  2202. >>> keypoint_labels = ['nose', 'eye', 'tail']
  2203. >>>
  2204. >>> # Example 1: Basic usage with default parameters
  2205. >>> transform_basic = A.Compose([
  2206. ... A.RandomSizedBBoxSafeCrop(height=224, width=224, p=1.0),
  2207. ... ], bbox_params=A.BboxParams(
  2208. ... format='pascal_voc',
  2209. ... label_fields=['bbox_labels']
  2210. ... ), keypoint_params=A.KeypointParams(
  2211. ... format='xy',
  2212. ... label_fields=['keypoint_labels']
  2213. ... ))
  2214. >>>
  2215. >>> # Apply the transform
  2216. >>> result_basic = transform_basic(
  2217. ... image=image,
  2218. ... mask=mask,
  2219. ... bboxes=bboxes,
  2220. ... bbox_labels=bbox_labels,
  2221. ... keypoints=keypoints,
  2222. ... keypoint_labels=keypoint_labels
  2223. ... )
  2224. >>>
  2225. >>> # Access the transformed data
  2226. >>> transformed_image = result_basic['image'] # Shape will be (224, 224, 3)
  2227. >>> transformed_mask = result_basic['mask'] # Shape will be (224, 224)
  2228. >>> transformed_bboxes = result_basic['bboxes'] # All original bounding boxes preserved
  2229. >>> transformed_bbox_labels = result_basic['bbox_labels'] # Original labels preserved
  2230. >>> transformed_keypoints = result_basic['keypoints'] # Keypoints adjusted to new coordinates
  2231. >>> transformed_keypoint_labels = result_basic['keypoint_labels'] # Original labels preserved
  2232. >>>
  2233. >>> # Example 2: With erosion_rate for more flexibility in crop placement
  2234. >>> transform_erosion = A.Compose([
  2235. ... A.RandomSizedBBoxSafeCrop(
  2236. ... height=256,
  2237. ... width=256,
  2238. ... erosion_rate=0.2, # Allows 20% flexibility in crop placement
  2239. ... interpolation=cv2.INTER_CUBIC, # Higher quality interpolation
  2240. ... mask_interpolation=cv2.INTER_NEAREST, # Preserve mask edges
  2241. ... p=1.0
  2242. ... ),
  2243. ... ], bbox_params=A.BboxParams(
  2244. ... format='pascal_voc',
  2245. ... label_fields=['bbox_labels'],
  2246. ... min_visibility=0.3 # Only keep bboxes with at least 30% visibility
  2247. ... ), keypoint_params=A.KeypointParams(
  2248. ... format='xy',
  2249. ... label_fields=['keypoint_labels'],
  2250. ... remove_invisible=True # Remove keypoints outside the crop
  2251. ... ))
  2252. >>>
  2253. >>> # Apply the transform with erosion
  2254. >>> result_erosion = transform_erosion(
  2255. ... image=image,
  2256. ... mask=mask,
  2257. ... bboxes=bboxes,
  2258. ... bbox_labels=bbox_labels,
  2259. ... keypoints=keypoints,
  2260. ... keypoint_labels=keypoint_labels
  2261. ... )
  2262. >>>
  2263. >>> # With erosion_rate=0.2, the crop has more flexibility in placement
  2264. >>> # while still ensuring all bounding boxes are included
  2265. """
  2266. _targets = ALL_TARGETS
  2267. class InitSchema(BaseTransformInitSchema):
  2268. height: Annotated[int, Field(ge=1)]
  2269. width: Annotated[int, Field(ge=1)]
  2270. erosion_rate: float = Field(
  2271. ge=0.0,
  2272. le=1.0,
  2273. )
  2274. interpolation: Literal[
  2275. cv2.INTER_NEAREST,
  2276. cv2.INTER_NEAREST_EXACT,
  2277. cv2.INTER_LINEAR,
  2278. cv2.INTER_CUBIC,
  2279. cv2.INTER_AREA,
  2280. cv2.INTER_LANCZOS4,
  2281. cv2.INTER_LINEAR_EXACT,
  2282. ]
  2283. mask_interpolation: Literal[
  2284. cv2.INTER_NEAREST,
  2285. cv2.INTER_NEAREST_EXACT,
  2286. cv2.INTER_LINEAR,
  2287. cv2.INTER_CUBIC,
  2288. cv2.INTER_AREA,
  2289. cv2.INTER_LANCZOS4,
  2290. cv2.INTER_LINEAR_EXACT,
  2291. ]
  2292. def __init__(
  2293. self,
  2294. height: int,
  2295. width: int,
  2296. erosion_rate: float = 0.0,
  2297. interpolation: Literal[
  2298. cv2.INTER_NEAREST,
  2299. cv2.INTER_NEAREST_EXACT,
  2300. cv2.INTER_LINEAR,
  2301. cv2.INTER_CUBIC,
  2302. cv2.INTER_AREA,
  2303. cv2.INTER_LANCZOS4,
  2304. cv2.INTER_LINEAR_EXACT,
  2305. ] = cv2.INTER_LINEAR,
  2306. mask_interpolation: Literal[
  2307. cv2.INTER_NEAREST,
  2308. cv2.INTER_NEAREST_EXACT,
  2309. cv2.INTER_LINEAR,
  2310. cv2.INTER_CUBIC,
  2311. cv2.INTER_AREA,
  2312. cv2.INTER_LANCZOS4,
  2313. cv2.INTER_LINEAR_EXACT,
  2314. ] = cv2.INTER_NEAREST,
  2315. p: float = 1.0,
  2316. ):
  2317. super().__init__(erosion_rate=erosion_rate, p=p)
  2318. self.height = height
  2319. self.width = width
  2320. self.interpolation = interpolation
  2321. self.mask_interpolation = mask_interpolation
  2322. def apply(
  2323. self,
  2324. img: np.ndarray,
  2325. crop_coords: tuple[int, int, int, int],
  2326. **params: Any,
  2327. ) -> np.ndarray:
  2328. """Apply the crop and pad transform to an image.
  2329. Args:
  2330. img (np.ndarray): The image to apply the crop and pad transform to.
  2331. crop_coords (tuple[int, int, int, int]): The parameters for the crop.
  2332. params (dict[str, Any]): Additional parameters for the transform.
  2333. """
  2334. crop = fcrops.crop(img, *crop_coords)
  2335. return fgeometric.resize(crop, (self.height, self.width), self.interpolation)
  2336. def apply_to_mask(
  2337. self,
  2338. mask: np.ndarray,
  2339. crop_coords: tuple[int, int, int, int],
  2340. **params: Any,
  2341. ) -> np.ndarray:
  2342. """Apply the crop and pad transform to a mask.
  2343. Args:
  2344. mask (np.ndarray): The mask to apply the crop and pad transform to.
  2345. crop_coords (tuple[int, int, int, int]): The parameters for the crop.
  2346. params (dict[str, Any]): Additional parameters for the transform.
  2347. """
  2348. crop = fcrops.crop(mask, *crop_coords)
  2349. return fgeometric.resize(crop, (self.height, self.width), self.mask_interpolation)
  2350. def apply_to_keypoints(
  2351. self,
  2352. keypoints: np.ndarray,
  2353. crop_coords: tuple[int, int, int, int],
  2354. **params: Any,
  2355. ) -> np.ndarray:
  2356. """Apply the crop and pad transform to keypoints.
  2357. Args:
  2358. keypoints (np.ndarray): The keypoints to apply the crop and pad transform to.
  2359. crop_coords (tuple[int, int, int, int]): The parameters for the crop.
  2360. params (dict[str, Any]): Additional parameters for the transform.
  2361. Returns:
  2362. np.ndarray: The keypoints after the crop and pad transform.
  2363. """
  2364. keypoints = fcrops.crop_keypoints_by_coords(keypoints, crop_coords)
  2365. crop_height = crop_coords[3] - crop_coords[1]
  2366. crop_width = crop_coords[2] - crop_coords[0]
  2367. scale_y = self.height / crop_height
  2368. scale_x = self.width / crop_width
  2369. return fgeometric.keypoints_scale(keypoints, scale_x=scale_x, scale_y=scale_y)
  2370. class CropAndPad(DualTransform):
  2371. """Crop and pad images by pixel amounts or fractions of image sizes.
  2372. This transform allows for simultaneous cropping and padding of images. Cropping removes pixels from the sides
  2373. (i.e., extracts a subimage), while padding adds pixels to the sides (e.g., black pixels). The amount of
  2374. cropping/padding can be specified either in absolute pixels or as a fraction of the image size.
  2375. Args:
  2376. px (int, tuple of int, tuple of tuples of int, or None):
  2377. The number of pixels to crop (negative values) or pad (positive values) on each side of the image.
  2378. Either this or the parameter `percent` may be set, not both at the same time.
  2379. - If int: crop/pad all sides by this value.
  2380. - If tuple of 2 ints: crop/pad by (top/bottom, left/right).
  2381. - If tuple of 4 ints: crop/pad by (top, right, bottom, left).
  2382. - Each int can also be a tuple of 2 ints for a range, or a list of ints for discrete choices.
  2383. Default: None.
  2384. percent (float, tuple of float, tuple of tuples of float, or None):
  2385. The fraction of the image size to crop (negative values) or pad (positive values) on each side.
  2386. Either this or the parameter `px` may be set, not both at the same time.
  2387. - If float: crop/pad all sides by this fraction.
  2388. - If tuple of 2 floats: crop/pad by (top/bottom, left/right) fractions.
  2389. - If tuple of 4 floats: crop/pad by (top, right, bottom, left) fractions.
  2390. - Each float can also be a tuple of 2 floats for a range, or a list of floats for discrete choices.
  2391. Default: None.
  2392. border_mode (int):
  2393. OpenCV border mode used for padding. Default: cv2.BORDER_CONSTANT.
  2394. fill (tuple[float, ...] | float):
  2395. The constant value to use for padding if border_mode is cv2.BORDER_CONSTANT.
  2396. Default: 0.
  2397. fill_mask (tuple[float, ...] | float):
  2398. Same as fill but used for mask padding. Default: 0.
  2399. keep_size (bool):
  2400. If True, the output image will be resized to the input image size after cropping/padding.
  2401. Default: True.
  2402. sample_independently (bool):
  2403. If True and ranges are used for px/percent, sample a value for each side independently.
  2404. If False, sample one value and use it for all sides. Default: True.
  2405. interpolation (int):
  2406. OpenCV interpolation flag used for resizing if keep_size is True.
  2407. Default: cv2.INTER_LINEAR.
  2408. mask_interpolation (int):
  2409. OpenCV interpolation flag used for resizing if keep_size is True.
  2410. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  2411. Default: cv2.INTER_NEAREST.
  2412. p (float):
  2413. Probability of applying the transform. Default: 1.0.
  2414. Targets:
  2415. image, mask, bboxes, keypoints, volume, mask3d
  2416. Image types:
  2417. uint8, float32
  2418. Note:
  2419. - This transform will never crop images below a height or width of 1.
  2420. - When using pixel values (px), the image will be cropped/padded by exactly that many pixels.
  2421. - When using percentages (percent), the amount of crop/pad will be calculated based on the image size.
  2422. - Bounding boxes that end up fully outside the image after cropping will be removed.
  2423. - Keypoints that end up outside the image after cropping will be removed.
  2424. Examples:
  2425. >>> import numpy as np
  2426. >>> import albumentations as A
  2427. >>> import cv2
  2428. >>>
  2429. >>> # Prepare sample data
  2430. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  2431. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  2432. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  2433. >>> bbox_labels = [1, 2]
  2434. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  2435. >>> keypoint_labels = [0, 1]
  2436. >>>
  2437. >>> # Example 1: Using px parameter with specific values for each side
  2438. >>> # Crop 10px from top, pad 20px on right, pad 30px on bottom, crop 40px from left
  2439. >>> transform_px = A.Compose([
  2440. ... A.CropAndPad(
  2441. ... px=(-10, 20, 30, -40), # (top, right, bottom, left)
  2442. ... border_mode=cv2.BORDER_CONSTANT,
  2443. ... fill=128, # Gray padding color
  2444. ... fill_mask=0,
  2445. ... keep_size=False, # Don't resize back to original dimensions
  2446. ... p=1.0
  2447. ... ),
  2448. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  2449. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  2450. >>>
  2451. >>> # Apply the transform
  2452. >>> result_px = transform_px(
  2453. ... image=image,
  2454. ... mask=mask,
  2455. ... bboxes=bboxes,
  2456. ... bbox_labels=bbox_labels,
  2457. ... keypoints=keypoints,
  2458. ... keypoint_labels=keypoint_labels
  2459. ... )
  2460. >>>
  2461. >>> # Get the transformed data with px parameters
  2462. >>> transformed_image_px = result_px['image'] # Shape will be different from original
  2463. >>> transformed_mask_px = result_px['mask']
  2464. >>> transformed_bboxes_px = result_px['bboxes'] # Adjusted to new dimensions
  2465. >>> transformed_bbox_labels_px = result_px['bbox_labels'] # Bounding box labels after crop
  2466. >>> transformed_keypoints_px = result_px['keypoints'] # Adjusted to new dimensions
  2467. >>> transformed_keypoint_labels_px = result_px['keypoint_labels'] # Keypoint labels after crop
  2468. >>>
  2469. >>> # Example 2: Using percent parameter as a single value
  2470. >>> # This will pad all sides by 10% of image dimensions
  2471. >>> transform_percent = A.Compose([
  2472. ... A.CropAndPad(
  2473. ... percent=0.1, # Pad all sides by 10%
  2474. ... border_mode=cv2.BORDER_REFLECT, # Use reflection padding
  2475. ... keep_size=True, # Resize back to original dimensions
  2476. ... p=1.0
  2477. ... ),
  2478. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  2479. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  2480. >>>
  2481. >>> # Apply the transform
  2482. >>> result_percent = transform_percent(
  2483. ... image=image,
  2484. ... mask=mask,
  2485. ... bboxes=bboxes,
  2486. ... bbox_labels=bbox_labels,
  2487. ... keypoints=keypoints,
  2488. ... keypoint_labels=keypoint_labels
  2489. ... )
  2490. >>>
  2491. >>> # Get the transformed data with percent parameters
  2492. >>> # Since keep_size=True, image dimensions remain the same (100x100)
  2493. >>> transformed_image_pct = result_percent['image']
  2494. >>> transformed_mask_pct = result_percent['mask']
  2495. >>> transformed_bboxes_pct = result_percent['bboxes']
  2496. >>> transformed_bbox_labels_pct = result_percent['bbox_labels']
  2497. >>> transformed_keypoints_pct = result_percent['keypoints']
  2498. >>> transformed_keypoint_labels_pct = result_percent['keypoint_labels']
  2499. >>>
  2500. >>> # Example 3: Random padding within a range
  2501. >>> # Pad top and bottom by 5-15%, left and right by 10-20%
  2502. >>> transform_random = A.Compose([
  2503. ... A.CropAndPad(
  2504. ... percent=[(0.05, 0.15), (0.1, 0.2), (0.05, 0.15), (0.1, 0.2)], # (top, right, bottom, left)
  2505. ... sample_independently=True, # Sample each side independently
  2506. ... border_mode=cv2.BORDER_CONSTANT,
  2507. ... fill=0, # Black padding
  2508. ... keep_size=False,
  2509. ... p=1.0
  2510. ... ),
  2511. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  2512. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  2513. >>>
  2514. >>> # Result dimensions will vary based on the random padding values chosen
  2515. """
  2516. _targets = ALL_TARGETS
  2517. class InitSchema(BaseTransformInitSchema):
  2518. px: PxType | None
  2519. percent: PercentType | None
  2520. keep_size: bool
  2521. sample_independently: bool
  2522. interpolation: Literal[
  2523. cv2.INTER_NEAREST,
  2524. cv2.INTER_NEAREST_EXACT,
  2525. cv2.INTER_LINEAR,
  2526. cv2.INTER_CUBIC,
  2527. cv2.INTER_AREA,
  2528. cv2.INTER_LANCZOS4,
  2529. cv2.INTER_LINEAR_EXACT,
  2530. ]
  2531. mask_interpolation: Literal[
  2532. cv2.INTER_NEAREST,
  2533. cv2.INTER_NEAREST_EXACT,
  2534. cv2.INTER_LINEAR,
  2535. cv2.INTER_CUBIC,
  2536. cv2.INTER_AREA,
  2537. cv2.INTER_LANCZOS4,
  2538. cv2.INTER_LINEAR_EXACT,
  2539. ]
  2540. fill: tuple[float, ...] | float
  2541. fill_mask: tuple[float, ...] | float
  2542. border_mode: Literal[
  2543. cv2.BORDER_CONSTANT,
  2544. cv2.BORDER_REPLICATE,
  2545. cv2.BORDER_REFLECT,
  2546. cv2.BORDER_WRAP,
  2547. cv2.BORDER_REFLECT_101,
  2548. ]
  2549. @model_validator(mode="after")
  2550. def _check_px_percent(self) -> Self:
  2551. if self.px is None and self.percent is None:
  2552. msg = "Both px and percent parameters cannot be None simultaneously."
  2553. raise ValueError(msg)
  2554. if self.px is not None and self.percent is not None:
  2555. msg = "Only px or percent may be set!"
  2556. raise ValueError(msg)
  2557. return self
  2558. def __init__(
  2559. self,
  2560. px: int | list[int] | None = None,
  2561. percent: float | list[float] | None = None,
  2562. keep_size: bool = True,
  2563. sample_independently: bool = True,
  2564. interpolation: Literal[
  2565. cv2.INTER_NEAREST,
  2566. cv2.INTER_NEAREST_EXACT,
  2567. cv2.INTER_LINEAR,
  2568. cv2.INTER_CUBIC,
  2569. cv2.INTER_AREA,
  2570. cv2.INTER_LANCZOS4,
  2571. cv2.INTER_LINEAR_EXACT,
  2572. ] = cv2.INTER_LINEAR,
  2573. mask_interpolation: Literal[
  2574. cv2.INTER_NEAREST,
  2575. cv2.INTER_NEAREST_EXACT,
  2576. cv2.INTER_LINEAR,
  2577. cv2.INTER_CUBIC,
  2578. cv2.INTER_AREA,
  2579. cv2.INTER_LANCZOS4,
  2580. cv2.INTER_LINEAR_EXACT,
  2581. ] = cv2.INTER_NEAREST,
  2582. border_mode: Literal[
  2583. cv2.BORDER_CONSTANT,
  2584. cv2.BORDER_REPLICATE,
  2585. cv2.BORDER_REFLECT,
  2586. cv2.BORDER_WRAP,
  2587. cv2.BORDER_REFLECT_101,
  2588. ] = cv2.BORDER_CONSTANT,
  2589. fill: tuple[float, ...] | float = 0,
  2590. fill_mask: tuple[float, ...] | float = 0,
  2591. p: float = 1.0,
  2592. ):
  2593. super().__init__(p=p)
  2594. self.px = px
  2595. self.percent = percent
  2596. self.border_mode = border_mode
  2597. self.fill = fill
  2598. self.fill_mask = fill_mask
  2599. self.keep_size = keep_size
  2600. self.sample_independently = sample_independently
  2601. self.interpolation = interpolation
  2602. self.mask_interpolation = mask_interpolation
  2603. def apply(
  2604. self,
  2605. img: np.ndarray,
  2606. crop_params: Sequence[int],
  2607. pad_params: Sequence[int],
  2608. fill: tuple[float, ...] | float,
  2609. **params: Any,
  2610. ) -> np.ndarray:
  2611. """Apply the crop and pad transform to an image.
  2612. Args:
  2613. img (np.ndarray): The image to apply the crop and pad transform to.
  2614. crop_params (Sequence[int]): The parameters for the crop.
  2615. pad_params (Sequence[int]): The parameters for the pad.
  2616. fill (tuple[float, ...] | float): The value to fill the image with.
  2617. params (dict[str, Any]): Additional parameters for the transform.
  2618. Returns:
  2619. np.ndarray: The image after the crop and pad transform.
  2620. """
  2621. return fcrops.crop_and_pad(
  2622. img,
  2623. crop_params,
  2624. pad_params,
  2625. fill,
  2626. params["shape"][:2],
  2627. self.interpolation,
  2628. self.border_mode,
  2629. self.keep_size,
  2630. )
  2631. def apply_to_mask(
  2632. self,
  2633. mask: np.ndarray,
  2634. crop_params: Sequence[int],
  2635. pad_params: Sequence[int],
  2636. fill_mask: tuple[float, ...] | float,
  2637. **params: Any,
  2638. ) -> np.ndarray:
  2639. """Apply the crop and pad transform to a mask.
  2640. Args:
  2641. mask (np.ndarray): The mask to apply the crop and pad transform to.
  2642. crop_params (Sequence[int]): The parameters for the crop.
  2643. pad_params (Sequence[int]): The parameters for the pad.
  2644. fill_mask (tuple[float, ...] | float): The value to fill the mask with.
  2645. params (dict[str, Any]): Additional parameters for the transform.
  2646. Returns:
  2647. np.ndarray: The mask after the crop and pad transform.
  2648. """
  2649. return fcrops.crop_and_pad(
  2650. mask,
  2651. crop_params,
  2652. pad_params,
  2653. fill_mask,
  2654. params["shape"][:2],
  2655. self.mask_interpolation,
  2656. self.border_mode,
  2657. self.keep_size,
  2658. )
  2659. def apply_to_bboxes(
  2660. self,
  2661. bboxes: np.ndarray,
  2662. crop_params: tuple[int, int, int, int],
  2663. pad_params: tuple[int, int, int, int],
  2664. result_shape: tuple[int, int],
  2665. **params: Any,
  2666. ) -> np.ndarray:
  2667. """Apply the crop and pad transform to bounding boxes.
  2668. Args:
  2669. bboxes (np.ndarray): The bounding boxes to apply the crop and pad transform to.
  2670. crop_params (tuple[int, int, int, int]): The parameters for the crop.
  2671. pad_params (tuple[int, int, int, int]): The parameters for the pad.
  2672. result_shape (tuple[int, int]): The shape of the result.
  2673. params (dict[str, Any]): Additional parameters for the transform.
  2674. Returns:
  2675. np.ndarray: The bounding boxes after the crop and pad transform.
  2676. """
  2677. return fcrops.crop_and_pad_bboxes(bboxes, crop_params, pad_params, params["shape"][:2], result_shape)
  2678. def apply_to_keypoints(
  2679. self,
  2680. keypoints: np.ndarray,
  2681. crop_params: tuple[int, int, int, int],
  2682. pad_params: tuple[int, int, int, int],
  2683. result_shape: tuple[int, int],
  2684. **params: Any,
  2685. ) -> np.ndarray:
  2686. """Apply the crop and pad transform to keypoints.
  2687. Args:
  2688. keypoints (np.ndarray): The keypoints to apply the crop and pad transform to.
  2689. crop_params (tuple[int, int, int, int]): The parameters for the crop.
  2690. pad_params (tuple[int, int, int, int]): The parameters for the pad.
  2691. result_shape (tuple[int, int]): The shape of the result.
  2692. params (dict[str, Any]): Additional parameters for the transform.
  2693. Returns:
  2694. np.ndarray: The keypoints after the crop and pad transform.
  2695. """
  2696. return fcrops.crop_and_pad_keypoints(
  2697. keypoints,
  2698. crop_params,
  2699. pad_params,
  2700. params["shape"][:2],
  2701. result_shape,
  2702. self.keep_size,
  2703. )
  2704. @staticmethod
  2705. def __prevent_zero(val1: int, val2: int, max_val: int) -> tuple[int, int]:
  2706. regain = abs(max_val) + 1
  2707. regain1 = regain // 2
  2708. regain2 = regain // 2
  2709. if regain1 + regain2 < regain:
  2710. regain1 += 1
  2711. if regain1 > val1:
  2712. diff = regain1 - val1
  2713. regain1 = val1
  2714. regain2 += diff
  2715. elif regain2 > val2:
  2716. diff = regain2 - val2
  2717. regain2 = val2
  2718. regain1 += diff
  2719. return val1 - regain1, val2 - regain2
  2720. @staticmethod
  2721. def _prevent_zero(crop_params: list[int], height: int, width: int) -> list[int]:
  2722. top, right, bottom, left = crop_params
  2723. remaining_height = height - (top + bottom)
  2724. remaining_width = width - (left + right)
  2725. if remaining_height < 1:
  2726. top, bottom = CropAndPad.__prevent_zero(top, bottom, height)
  2727. if remaining_width < 1:
  2728. left, right = CropAndPad.__prevent_zero(left, right, width)
  2729. return [max(top, 0), max(right, 0), max(bottom, 0), max(left, 0)]
  2730. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  2731. """Get the parameters for the crop.
  2732. Args:
  2733. params (dict[str, Any]): The parameters for the transform.
  2734. data (dict[str, Any]): The data for the transform.
  2735. Returns:
  2736. dict[str, Any]: The parameters for the crop.
  2737. """
  2738. height, width = params["shape"][:2]
  2739. if self.px is not None:
  2740. new_params = self._get_px_params()
  2741. else:
  2742. percent_params = self._get_percent_params()
  2743. new_params = [
  2744. int(percent_params[0] * height),
  2745. int(percent_params[1] * width),
  2746. int(percent_params[2] * height),
  2747. int(percent_params[3] * width),
  2748. ]
  2749. pad_params = [max(i, 0) for i in new_params]
  2750. crop_params = self._prevent_zero([-min(i, 0) for i in new_params], height, width)
  2751. top, right, bottom, left = crop_params
  2752. crop_params = [left, top, width - right, height - bottom]
  2753. result_rows = crop_params[3] - crop_params[1]
  2754. result_cols = crop_params[2] - crop_params[0]
  2755. if result_cols == width and result_rows == height:
  2756. crop_params = []
  2757. top, right, bottom, left = pad_params
  2758. pad_params = [top, bottom, left, right]
  2759. if any(pad_params):
  2760. result_rows += top + bottom
  2761. result_cols += left + right
  2762. else:
  2763. pad_params = []
  2764. return {
  2765. "crop_params": crop_params or None,
  2766. "pad_params": pad_params or None,
  2767. "fill": None if pad_params is None else self._get_pad_value(self.fill),
  2768. "fill_mask": None
  2769. if pad_params is None
  2770. else self._get_pad_value(cast("Union[tuple[float, ...], float]", self.fill_mask)),
  2771. "result_shape": (result_rows, result_cols),
  2772. }
  2773. def _get_px_params(self) -> list[int]:
  2774. if self.px is None:
  2775. msg = "px is not set"
  2776. raise ValueError(msg)
  2777. if isinstance(self.px, int):
  2778. return [self.px] * 4
  2779. if len(self.px) == PAIR:
  2780. if self.sample_independently:
  2781. return [self.py_random.randrange(*self.px) for _ in range(4)]
  2782. px = self.py_random.randrange(*self.px)
  2783. return [px] * 4
  2784. if isinstance(self.px[0], int):
  2785. return self.px
  2786. if len(self.px[0]) == PAIR:
  2787. return [self.py_random.randrange(*i) for i in self.px]
  2788. return [self.py_random.choice(i) for i in self.px]
  2789. def _get_percent_params(self) -> list[float]:
  2790. if self.percent is None:
  2791. msg = "percent is not set"
  2792. raise ValueError(msg)
  2793. if isinstance(self.percent, float):
  2794. params = [self.percent] * 4
  2795. elif len(self.percent) == PAIR:
  2796. if self.sample_independently:
  2797. params = [self.py_random.uniform(*self.percent) for _ in range(4)]
  2798. else:
  2799. px = self.py_random.uniform(*self.percent)
  2800. params = [px] * 4
  2801. elif isinstance(self.percent[0], (int, float)):
  2802. params = self.percent
  2803. elif len(self.percent[0]) == PAIR:
  2804. params = [self.py_random.uniform(*i) for i in self.percent]
  2805. else:
  2806. params = [self.py_random.choice(i) for i in self.percent]
  2807. return params # params = [top, right, bottom, left]
  2808. def _get_pad_value(
  2809. self,
  2810. fill: Sequence[float] | float,
  2811. ) -> int | float:
  2812. if isinstance(fill, (list, tuple)):
  2813. if len(fill) == PAIR:
  2814. a, b = fill
  2815. if isinstance(a, int) and isinstance(b, int):
  2816. return self.py_random.randint(a, b)
  2817. return self.py_random.uniform(a, b)
  2818. return self.py_random.choice(fill)
  2819. if isinstance(fill, (int, float)):
  2820. return fill
  2821. msg = "fill should be a number or list, or tuple of two numbers."
  2822. raise ValueError(msg)
  2823. class RandomCropFromBorders(BaseCrop):
  2824. """Randomly crops the input from its borders without resizing.
  2825. This transform randomly crops parts of the input (image, mask, bounding boxes, or keypoints)
  2826. from each of its borders. The amount of cropping is specified as a fraction of the input's
  2827. dimensions for each side independently.
  2828. Args:
  2829. crop_left (float): The maximum fraction of width to crop from the left side.
  2830. Must be in the range [0.0, 1.0]. Default: 0.1
  2831. crop_right (float): The maximum fraction of width to crop from the right side.
  2832. Must be in the range [0.0, 1.0]. Default: 0.1
  2833. crop_top (float): The maximum fraction of height to crop from the top.
  2834. Must be in the range [0.0, 1.0]. Default: 0.1
  2835. crop_bottom (float): The maximum fraction of height to crop from the bottom.
  2836. Must be in the range [0.0, 1.0]. Default: 0.1
  2837. p (float): Probability of applying the transform. Default: 1.0
  2838. Targets:
  2839. image, mask, bboxes, keypoints, volume, mask3d
  2840. Image types:
  2841. uint8, float32
  2842. Note:
  2843. - The actual amount of cropping for each side is randomly chosen between 0 and
  2844. the specified maximum for each application of the transform.
  2845. - The sum of crop_left and crop_right must not exceed 1.0, and the sum of
  2846. crop_top and crop_bottom must not exceed 1.0. Otherwise, a ValueError will be raised.
  2847. - This transform does not resize the input after cropping, so the output dimensions
  2848. will be smaller than the input dimensions.
  2849. - Bounding boxes that end up fully outside the cropped area will be removed.
  2850. - Keypoints that end up outside the cropped area will be removed.
  2851. Examples:
  2852. >>> import numpy as np
  2853. >>> import albumentations as A
  2854. >>>
  2855. >>> # Prepare sample data
  2856. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  2857. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  2858. >>> bboxes = np.array([[10, 10, 50, 50], [40, 40, 80, 80]], dtype=np.float32)
  2859. >>> bbox_labels = [1, 2]
  2860. >>> keypoints = np.array([[20, 30], [60, 70]], dtype=np.float32)
  2861. >>> keypoint_labels = [0, 1]
  2862. >>>
  2863. >>> # Define transform with crop fractions for each border
  2864. >>> transform = A.Compose([
  2865. ... A.RandomCropFromBorders(
  2866. ... crop_left=0.1, # Max 10% crop from left
  2867. ... crop_right=0.2, # Max 20% crop from right
  2868. ... crop_top=0.15, # Max 15% crop from top
  2869. ... crop_bottom=0.05, # Max 5% crop from bottom
  2870. ... p=1.0
  2871. ... ),
  2872. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  2873. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  2874. >>>
  2875. >>> # Apply transform
  2876. >>> result = transform(
  2877. ... image=image,
  2878. ... mask=mask,
  2879. ... bboxes=bboxes,
  2880. ... bbox_labels=bbox_labels,
  2881. ... keypoints=keypoints,
  2882. ... keypoint_labels=keypoint_labels
  2883. ... )
  2884. >>>
  2885. >>> # Access transformed data
  2886. >>> transformed_image = result['image'] # Reduced size image with borders cropped
  2887. >>> transformed_mask = result['mask'] # Reduced size mask with borders cropped
  2888. >>> transformed_bboxes = result['bboxes'] # Bounding boxes adjusted to new dimensions
  2889. >>> transformed_bbox_labels = result['bbox_labels'] # Bounding box labels after crop
  2890. >>> transformed_keypoints = result['keypoints'] # Keypoints adjusted to new dimensions
  2891. >>> transformed_keypoint_labels = result['keypoint_labels'] # Keypoint labels after crop
  2892. >>>
  2893. >>> # The resulting output shapes will be smaller, with dimensions reduced by
  2894. >>> # the random crop amounts from each side (within the specified maximums)
  2895. >>> print(f"Original image shape: (100, 100, 3)")
  2896. >>> print(f"Transformed image shape: {transformed_image.shape}") # e.g., (85, 75, 3)
  2897. """
  2898. _targets = ALL_TARGETS
  2899. class InitSchema(BaseTransformInitSchema):
  2900. crop_left: float = Field(
  2901. ge=0.0,
  2902. le=1.0,
  2903. )
  2904. crop_right: float = Field(
  2905. ge=0.0,
  2906. le=1.0,
  2907. )
  2908. crop_top: float = Field(
  2909. ge=0.0,
  2910. le=1.0,
  2911. )
  2912. crop_bottom: float = Field(
  2913. ge=0.0,
  2914. le=1.0,
  2915. )
  2916. @model_validator(mode="after")
  2917. def _validate_crop_values(self) -> Self:
  2918. if self.crop_left + self.crop_right > 1.0:
  2919. msg = "The sum of crop_left and crop_right must be <= 1."
  2920. raise ValueError(msg)
  2921. if self.crop_top + self.crop_bottom > 1.0:
  2922. msg = "The sum of crop_top and crop_bottom must be <= 1."
  2923. raise ValueError(msg)
  2924. return self
  2925. def __init__(
  2926. self,
  2927. crop_left: float = 0.1,
  2928. crop_right: float = 0.1,
  2929. crop_top: float = 0.1,
  2930. crop_bottom: float = 0.1,
  2931. p: float = 1.0,
  2932. ):
  2933. super().__init__(p=p)
  2934. self.crop_left = crop_left
  2935. self.crop_right = crop_right
  2936. self.crop_top = crop_top
  2937. self.crop_bottom = crop_bottom
  2938. def get_params_dependent_on_data(
  2939. self,
  2940. params: dict[str, Any],
  2941. data: dict[str, Any],
  2942. ) -> dict[str, tuple[int, int, int, int]]:
  2943. """Get the parameters for the crop.
  2944. Args:
  2945. params (dict[str, Any]): The parameters for the transform.
  2946. data (dict[str, Any]): The data for the transform.
  2947. Returns:
  2948. dict[str, tuple[int, int, int, int]]: The parameters for the crop.
  2949. """
  2950. height, width = params["shape"][:2]
  2951. x_min = self.py_random.randint(0, int(self.crop_left * width))
  2952. x_max = self.py_random.randint(max(x_min + 1, int((1 - self.crop_right) * width)), width)
  2953. y_min = self.py_random.randint(0, int(self.crop_top * height))
  2954. y_max = self.py_random.randint(max(y_min + 1, int((1 - self.crop_bottom) * height)), height)
  2955. crop_coords = x_min, y_min, x_max, y_max
  2956. return {"crop_coords": crop_coords}
  2957. class AtLeastOneBBoxRandomCrop(BaseCrop):
  2958. """Crop an area from image while ensuring at least one bounding box is present in the crop.
  2959. Similar to BBoxSafeRandomCrop, but with a key difference:
  2960. - BBoxSafeRandomCrop ensures ALL bounding boxes are preserved in the crop
  2961. - AtLeastOneBBoxRandomCrop ensures AT LEAST ONE bounding box is present in the crop
  2962. This makes AtLeastOneBBoxRandomCrop more flexible for scenarios where:
  2963. - You want to focus on individual objects rather than all objects
  2964. - You're willing to lose some bounding boxes to get more varied crops
  2965. - The image has many bounding boxes and keeping all of them would be too restrictive
  2966. The algorithm:
  2967. 1. If bounding boxes exist:
  2968. - Randomly selects a reference bounding box from available boxes
  2969. - Computes an eroded version of this box (shrunk by erosion_factor)
  2970. - Calculates valid crop bounds that ensure overlap with the eroded box
  2971. - Randomly samples crop coordinates within these bounds
  2972. 2. If no bounding boxes exist:
  2973. - Uses full image dimensions as valid bounds
  2974. - Randomly samples crop coordinates within these bounds
  2975. Args:
  2976. height (int): Fixed height of the crop
  2977. width (int): Fixed width of the crop
  2978. erosion_factor (float, optional): Factor by which to erode (shrink) the reference
  2979. bounding box when computing valid crop regions. Must be in range [0.0, 1.0].
  2980. - 0.0 means no erosion (crop must fully contain the reference box)
  2981. - 1.0 means maximum erosion (crop can be anywhere that intersects the reference box)
  2982. Defaults to 0.0.
  2983. p (float, optional): Probability of applying the transform. Defaults to 1.0.
  2984. Targets:
  2985. image, mask, bboxes, keypoints, volume, mask3d
  2986. Image types:
  2987. uint8, float32
  2988. Raises:
  2989. CropSizeError: If requested crop size exceeds image dimensions
  2990. Examples:
  2991. >>> import numpy as np
  2992. >>> import albumentations as A
  2993. >>> import cv2
  2994. >>>
  2995. >>> # Prepare sample data
  2996. >>> image = np.random.randint(0, 256, (300, 300, 3), dtype=np.uint8)
  2997. >>> mask = np.random.randint(0, 2, (300, 300), dtype=np.uint8)
  2998. >>> # Create multiple bounding boxes - the transform will ensure at least one is in the crop
  2999. >>> bboxes = np.array([
  3000. ... [30, 50, 100, 140], # first box
  3001. ... [150, 120, 270, 250], # second box
  3002. ... [200, 30, 280, 90] # third box
  3003. ... ], dtype=np.float32)
  3004. >>> bbox_labels = [1, 2, 3]
  3005. >>> keypoints = np.array([
  3006. ... [50, 70], # keypoint inside first box
  3007. ... [190, 170], # keypoint inside second box
  3008. ... [240, 60] # keypoint inside third box
  3009. ... ], dtype=np.float32)
  3010. >>> keypoint_labels = [0, 1, 2]
  3011. >>>
  3012. >>> # Define transform with different erosion_factor values
  3013. >>> transform = A.Compose([
  3014. ... A.AtLeastOneBBoxRandomCrop(
  3015. ... height=200,
  3016. ... width=200,
  3017. ... erosion_factor=0.2, # Allows moderate flexibility in crop placement
  3018. ... p=1.0
  3019. ... ),
  3020. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']),
  3021. ... keypoint_params=A.KeypointParams(format='xy', label_fields=['keypoint_labels']))
  3022. >>>
  3023. >>> # Apply the transform
  3024. >>> transformed = transform(
  3025. ... image=image,
  3026. ... mask=mask,
  3027. ... bboxes=bboxes,
  3028. ... bbox_labels=bbox_labels,
  3029. ... keypoints=keypoints,
  3030. ... keypoint_labels=keypoint_labels
  3031. ... )
  3032. >>>
  3033. >>> # Get the transformed data
  3034. >>> transformed_image = transformed['image'] # Shape: (200, 200, 3)
  3035. >>> transformed_mask = transformed['mask'] # Shape: (200, 200)
  3036. >>> transformed_bboxes = transformed['bboxes'] # At least one bbox is guaranteed
  3037. >>> transformed_bbox_labels = transformed['bbox_labels'] # Labels for the preserved bboxes
  3038. >>> transformed_keypoints = transformed['keypoints'] # Only keypoints in crop are kept
  3039. >>> transformed_keypoint_labels = transformed['keypoint_labels'] # Their labels
  3040. >>>
  3041. >>> # Verify that at least one bounding box was preserved
  3042. >>> assert len(transformed_bboxes) > 0, "Should have at least one bbox in the crop"
  3043. >>>
  3044. >>> # With erosion_factor=0.0, the crop must fully contain the selected reference bbox
  3045. >>> conservative_transform = A.Compose([
  3046. ... A.AtLeastOneBBoxRandomCrop(
  3047. ... height=200,
  3048. ... width=200,
  3049. ... erosion_factor=0.0, # No erosion - crop must fully contain a bbox
  3050. ... p=1.0
  3051. ... ),
  3052. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
  3053. >>>
  3054. >>> # With erosion_factor=1.0, the crop must only intersect with the selected reference bbox
  3055. >>> flexible_transform = A.Compose([
  3056. ... A.AtLeastOneBBoxRandomCrop(
  3057. ... height=200,
  3058. ... width=200,
  3059. ... erosion_factor=1.0, # Maximum erosion - crop only needs to intersect a bbox
  3060. ... p=1.0
  3061. ... ),
  3062. ... ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_labels']))
  3063. Note:
  3064. - Uses fixed crop dimensions (height and width)
  3065. - Bounding boxes that end up partially outside the crop will be adjusted
  3066. - Bounding boxes that end up completely outside the crop will be removed
  3067. - If no bounding boxes are provided, acts as a regular random crop
  3068. """
  3069. _targets = ALL_TARGETS
  3070. class InitSchema(BaseCrop.InitSchema):
  3071. height: Annotated[int, Field(ge=1)]
  3072. width: Annotated[int, Field(ge=1)]
  3073. erosion_factor: Annotated[float, Field(ge=0.0, le=1.0)]
  3074. def __init__(
  3075. self,
  3076. height: int,
  3077. width: int,
  3078. erosion_factor: float = 0.0,
  3079. p: float = 1.0,
  3080. ):
  3081. super().__init__(p=p)
  3082. self.height = height
  3083. self.width = width
  3084. self.erosion_factor = erosion_factor
  3085. def get_params_dependent_on_data(
  3086. self,
  3087. params: dict[str, Any],
  3088. data: dict[str, Any],
  3089. ) -> dict[str, tuple[int, int, int, int]]:
  3090. """Get the parameters for the crop.
  3091. Args:
  3092. params (dict[str, Any]): The parameters for the transform.
  3093. data (dict[str, Any]): The data for the transform.
  3094. """
  3095. image_height, image_width = params["shape"][:2]
  3096. bboxes = data.get("bboxes", [])
  3097. if self.height > image_height or self.width > image_width:
  3098. raise CropSizeError(
  3099. f"Crop size (height, width) exceeds image dimensions (height, width):"
  3100. f" {(self.height, self.width)} vs {image_height, image_width}",
  3101. )
  3102. if len(bboxes) > 0:
  3103. bboxes = denormalize_bboxes(bboxes, shape=(image_height, image_width))
  3104. # Pick a bbox amongst all possible as our reference bbox.
  3105. reference_bbox = self.py_random.choice(bboxes)
  3106. bbox_x1, bbox_y1, bbox_x2, bbox_y2 = reference_bbox[:4]
  3107. # Compute valid crop bounds:
  3108. # erosion_factor = 0.0: crop must fully contain the bbox
  3109. # erosion_factor = 1.0: crop can be anywhere that intersects the bbox
  3110. if self.erosion_factor < 1.0:
  3111. # Regular case: compute eroded box dimensions
  3112. bbox_width = bbox_x2 - bbox_x1
  3113. bbox_height = bbox_y2 - bbox_y1
  3114. eroded_width = bbox_width * (1.0 - self.erosion_factor)
  3115. eroded_height = bbox_height * (1.0 - self.erosion_factor)
  3116. min_crop_x = np.clip(
  3117. a=bbox_x1 + eroded_width - self.width,
  3118. a_min=0.0,
  3119. a_max=image_width - self.width,
  3120. )
  3121. max_crop_x = np.clip(
  3122. a=bbox_x2 - eroded_width,
  3123. a_min=0.0,
  3124. a_max=image_width - self.width,
  3125. )
  3126. min_crop_y = np.clip(
  3127. a=bbox_y1 + eroded_height - self.height,
  3128. a_min=0.0,
  3129. a_max=image_height - self.height,
  3130. )
  3131. max_crop_y = np.clip(
  3132. a=bbox_y2 - eroded_height,
  3133. a_min=0.0,
  3134. a_max=image_height - self.height,
  3135. )
  3136. else:
  3137. # Maximum erosion case: crop can be anywhere that intersects the bbox
  3138. min_crop_x = np.clip(
  3139. a=bbox_x1 - self.width, # leftmost position that still intersects
  3140. a_min=0.0,
  3141. a_max=image_width - self.width,
  3142. )
  3143. max_crop_x = np.clip(
  3144. a=bbox_x2, # rightmost position that still intersects
  3145. a_min=0.0,
  3146. a_max=image_width - self.width,
  3147. )
  3148. min_crop_y = np.clip(
  3149. a=bbox_y1 - self.height, # topmost position that still intersects
  3150. a_min=0.0,
  3151. a_max=image_height - self.height,
  3152. )
  3153. max_crop_y = np.clip(
  3154. a=bbox_y2, # bottommost position that still intersects
  3155. a_min=0.0,
  3156. a_max=image_height - self.height,
  3157. )
  3158. else:
  3159. # If there are no bboxes, just crop anywhere in the image.
  3160. min_crop_x = 0.0
  3161. max_crop_x = image_width - self.width
  3162. min_crop_y = 0.0
  3163. max_crop_y = image_height - self.height
  3164. # Randomly draw the upper-left corner of the crop.
  3165. crop_x1 = int(self.py_random.uniform(a=min_crop_x, b=max_crop_x))
  3166. crop_y1 = int(self.py_random.uniform(a=min_crop_y, b=max_crop_y))
  3167. crop_x2 = crop_x1 + self.width
  3168. crop_y2 = crop_y1 + self.height
  3169. return {"crop_coords": (crop_x1, crop_y1, crop_x2, crop_y2)}