transforms.py 277 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340
  1. """Pixel-level transformations for image augmentation.
  2. This module contains transforms that modify pixel values without changing the geometry of the image.
  3. Includes transforms for adjusting color, brightness, contrast, adding noise, simulating weather effects,
  4. and other pixel-level manipulations.
  5. """
  6. from __future__ import annotations
  7. import math
  8. import numbers
  9. import warnings
  10. from collections.abc import Sequence
  11. from typing import Annotated, Any, Callable, Union, cast
  12. import albucore
  13. import cv2
  14. import numpy as np
  15. from albucore import (
  16. MAX_VALUES_BY_DTYPE,
  17. NUM_MULTI_CHANNEL_DIMENSIONS,
  18. batch_transform,
  19. get_num_channels,
  20. is_grayscale_image,
  21. is_rgb_image,
  22. multiply,
  23. normalize,
  24. normalize_per_image,
  25. )
  26. from pydantic import (
  27. AfterValidator,
  28. BaseModel,
  29. ConfigDict,
  30. Field,
  31. ValidationInfo,
  32. field_validator,
  33. model_validator,
  34. )
  35. from scipy import special
  36. from typing_extensions import Literal, Self
  37. import albumentations.augmentations.geometric.functional as fgeometric
  38. from albumentations.augmentations.blur import functional as fblur
  39. from albumentations.augmentations.blur.transforms import BlurInitSchema
  40. from albumentations.augmentations.pixel import functional as fpixel
  41. from albumentations.augmentations.utils import check_range, non_rgb_error
  42. from albumentations.core.pydantic import (
  43. NonNegativeFloatRangeType,
  44. OnePlusFloatRangeType,
  45. OnePlusIntRangeType,
  46. SymmetricRangeType,
  47. ZeroOneRangeType,
  48. check_range_bounds,
  49. nondecreasing,
  50. )
  51. from albumentations.core.transforms_interface import (
  52. BaseTransformInitSchema,
  53. ImageOnlyTransform,
  54. )
  55. from albumentations.core.type_definitions import (
  56. MAX_RAIN_ANGLE,
  57. NUM_RGB_CHANNELS,
  58. PAIR,
  59. SEVEN,
  60. )
  61. from albumentations.core.utils import to_tuple
  62. __all__ = [
  63. "CLAHE",
  64. "AdditiveNoise",
  65. "AutoContrast",
  66. "ChannelShuffle",
  67. "ChromaticAberration",
  68. "ColorJitter",
  69. "Downscale",
  70. "Emboss",
  71. "Equalize",
  72. "FancyPCA",
  73. "GaussNoise",
  74. "HEStain",
  75. "HueSaturationValue",
  76. "ISONoise",
  77. "Illumination",
  78. "ImageCompression",
  79. "InvertImg",
  80. "MultiplicativeNoise",
  81. "Normalize",
  82. "PlanckianJitter",
  83. "PlasmaBrightnessContrast",
  84. "PlasmaShadow",
  85. "Posterize",
  86. "RGBShift",
  87. "RandomBrightnessContrast",
  88. "RandomFog",
  89. "RandomGamma",
  90. "RandomGravel",
  91. "RandomRain",
  92. "RandomShadow",
  93. "RandomSnow",
  94. "RandomSunFlare",
  95. "RandomToneCurve",
  96. "RingingOvershoot",
  97. "SaltAndPepper",
  98. "Sharpen",
  99. "ShotNoise",
  100. "Solarize",
  101. "Spatter",
  102. "Superpixels",
  103. "ToGray",
  104. "ToRGB",
  105. "ToSepia",
  106. "UnsharpMask",
  107. ]
  108. NUM_BITS_ARRAY_LENGTH = 3
  109. TWENTY = 20
  110. class Normalize(ImageOnlyTransform):
  111. """Applies various normalization techniques to an image. The specific normalization technique can be selected
  112. with the `normalization` parameter.
  113. Standard normalization is applied using the formula:
  114. `img = (img - mean * max_pixel_value) / (std * max_pixel_value)`.
  115. Other normalization techniques adjust the image based on global or per-channel statistics,
  116. or scale pixel values to a specified range.
  117. Args:
  118. mean (tuple[float, float] | float | None): Mean values for standard normalization.
  119. For "standard" normalization, the default values are ImageNet mean values: (0.485, 0.456, 0.406).
  120. std (tuple[float, float] | float | None): Standard deviation values for standard normalization.
  121. For "standard" normalization, the default values are ImageNet standard deviation :(0.229, 0.224, 0.225).
  122. max_pixel_value (float | None): Maximum possible pixel value, used for scaling in standard normalization.
  123. Defaults to 255.0.
  124. normalization (Literal["standard", "image", "image_per_channel", "min_max", "min_max_per_channel"]):
  125. Specifies the normalization technique to apply. Defaults to "standard".
  126. - "standard": Applies the formula `(img - mean * max_pixel_value) / (std * max_pixel_value)`.
  127. The default mean and std are based on ImageNet. You can use mean and std values of (0.5, 0.5, 0.5)
  128. for inception normalization. And mean values of (0, 0, 0) and std values of (1, 1, 1) for YOLO.
  129. - "image": Normalizes the whole image based on its global mean and standard deviation.
  130. - "image_per_channel": Normalizes the image per channel based on each channel's mean and standard deviation.
  131. - "min_max": Scales the image pixel values to a [0, 1] range based on the global
  132. minimum and maximum pixel values.
  133. - "min_max_per_channel": Scales each channel of the image pixel values to a [0, 1]
  134. range based on the per-channel minimum and maximum pixel values.
  135. p (float): Probability of applying the transform. Defaults to 1.0.
  136. Targets:
  137. image
  138. Image types:
  139. uint8, float32
  140. Note:
  141. - For "standard" normalization, `mean`, `std`, and `max_pixel_value` must be provided.
  142. - For other normalization types, these parameters are ignored.
  143. - For inception normalization, use mean values of (0.5, 0.5, 0.5).
  144. - For YOLO normalization, use mean values of (0, 0, 0) and std values of (1, 1, 1).
  145. - This transform is often used as a final step in image preprocessing pipelines to
  146. prepare images for neural network input.
  147. Examples:
  148. >>> import numpy as np
  149. >>> import albumentations as A
  150. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  151. >>> # Standard ImageNet normalization
  152. >>> transform = A.Normalize(
  153. ... mean=(0.485, 0.456, 0.406),
  154. ... std=(0.229, 0.224, 0.225),
  155. ... max_pixel_value=255.0,
  156. ... p=1.0
  157. ... )
  158. >>> normalized_image = transform(image=image)["image"]
  159. >>>
  160. >>> # Min-max normalization
  161. >>> transform_minmax = A.Normalize(normalization="min_max", p=1.0)
  162. >>> normalized_image_minmax = transform_minmax(image=image)["image"]
  163. References:
  164. - ImageNet mean and std: https://pytorch.org/vision/stable/models.html
  165. - Inception preprocessing: https://keras.io/api/applications/inceptionv3/
  166. """
  167. class InitSchema(BaseTransformInitSchema):
  168. mean: tuple[float, ...] | float | None
  169. std: tuple[float, ...] | float | None
  170. max_pixel_value: float | None
  171. normalization: Literal[
  172. "standard",
  173. "image",
  174. "image_per_channel",
  175. "min_max",
  176. "min_max_per_channel",
  177. ]
  178. @model_validator(mode="after")
  179. def _validate_normalization(self) -> Self:
  180. if (
  181. self.mean is None
  182. or self.std is None
  183. or (self.max_pixel_value is None and self.normalization == "standard")
  184. ):
  185. raise ValueError(
  186. "mean, std, and max_pixel_value must be provided for standard normalization.",
  187. )
  188. return self
  189. def __init__(
  190. self,
  191. mean: tuple[float, ...] | float | None = (0.485, 0.456, 0.406),
  192. std: tuple[float, ...] | float | None = (0.229, 0.224, 0.225),
  193. max_pixel_value: float | None = 255.0,
  194. normalization: Literal[
  195. "standard",
  196. "image",
  197. "image_per_channel",
  198. "min_max",
  199. "min_max_per_channel",
  200. ] = "standard",
  201. p: float = 1.0,
  202. ):
  203. super().__init__(p=p)
  204. self.mean = mean
  205. self.mean_np = np.array(mean, dtype=np.float32) * max_pixel_value
  206. self.std = std
  207. self.denominator = np.reciprocal(
  208. np.array(std, dtype=np.float32) * max_pixel_value,
  209. )
  210. self.max_pixel_value = max_pixel_value
  211. self.normalization = normalization
  212. def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
  213. """Apply normalization to the input image.
  214. Args:
  215. img (np.ndarray): The input image to normalize.
  216. **params (Any): Additional parameters (not used in this transform).
  217. Returns:
  218. np.ndarray: The normalized image.
  219. """
  220. if self.normalization == "standard":
  221. return normalize(
  222. img,
  223. self.mean_np,
  224. self.denominator,
  225. )
  226. return normalize_per_image(img, self.normalization)
  227. @batch_transform("channel", has_batch_dim=True, has_depth_dim=False)
  228. def apply_to_images(self, images: np.ndarray, **params: Any) -> np.ndarray:
  229. """Apply normalization to a batch of images.
  230. Args:
  231. images (np.ndarray): Batch of images to normalize with shape (batch, height, width, channels).
  232. **params (Any): Additional parameters.
  233. Returns:
  234. np.ndarray: Normalized batch of images.
  235. """
  236. return self.apply(images, **params)
  237. @batch_transform("channel", has_batch_dim=False, has_depth_dim=True)
  238. def apply_to_volume(self, volume: np.ndarray, **params: Any) -> np.ndarray:
  239. """Apply normalization to a 3D volume.
  240. Args:
  241. volume (np.ndarray): 3D volume to normalize with shape (depth, height, width, channels).
  242. **params (Any): Additional parameters.
  243. Returns:
  244. np.ndarray: Normalized 3D volume.
  245. """
  246. return self.apply(volume, **params)
  247. @batch_transform("channel", has_batch_dim=True, has_depth_dim=True)
  248. def apply_to_volumes(self, volumes: np.ndarray, **params: Any) -> np.ndarray:
  249. """Apply normalization to a batch of 3D volumes.
  250. Args:
  251. volumes (np.ndarray): Batch of 3D volumes to normalize with shape (batch, depth, height, width, channels).
  252. **params (Any): Additional parameters.
  253. Returns:
  254. np.ndarray: Normalized batch of 3D volumes.
  255. """
  256. return self.apply(volumes, **params)
  257. class ImageCompression(ImageOnlyTransform):
  258. """Decrease image quality by applying JPEG or WebP compression.
  259. This transform simulates the effect of saving an image with lower quality settings,
  260. which can introduce compression artifacts. It's useful for data augmentation and
  261. for testing model robustness against varying image qualities.
  262. Args:
  263. quality_range (tuple[int, int]): Range for the compression quality.
  264. The values should be in [1, 100] range, where:
  265. - 1 is the lowest quality (maximum compression)
  266. - 100 is the highest quality (minimum compression)
  267. Default: (99, 100)
  268. compression_type (Literal["jpeg", "webp"]): Type of compression to apply.
  269. - "jpeg": JPEG compression
  270. - "webp": WebP compression
  271. Default: "jpeg"
  272. p (float): Probability of applying the transform. Default: 0.5.
  273. Targets:
  274. image
  275. Image types:
  276. uint8, float32
  277. Number of channels:
  278. Any
  279. Note:
  280. - This transform expects images with 1, 3, or 4 channels.
  281. - For JPEG compression, alpha channels (4th channel) will be ignored.
  282. - WebP compression supports transparency (4 channels).
  283. - The actual file is not saved to disk; the compression is simulated in memory.
  284. - Lower quality values result in smaller file sizes but may introduce visible artifacts.
  285. - This transform can be useful for:
  286. * Data augmentation to improve model robustness
  287. * Testing how models perform on images of varying quality
  288. * Simulating images transmitted over low-bandwidth connections
  289. Examples:
  290. >>> import numpy as np
  291. >>> import albumentations as A
  292. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  293. >>> transform = A.ImageCompression(quality_range=(50, 90), compression_type=0, p=1.0)
  294. >>> result = transform(image=image)
  295. >>> compressed_image = result["image"]
  296. References:
  297. - JPEG compression: https://en.wikipedia.org/wiki/JPEG
  298. - WebP compression: https://developers.google.com/speed/webp
  299. """
  300. class InitSchema(BaseTransformInitSchema):
  301. quality_range: Annotated[
  302. tuple[int, int],
  303. AfterValidator(check_range_bounds(1, 100)),
  304. AfterValidator(nondecreasing),
  305. ]
  306. compression_type: Literal["jpeg", "webp"]
  307. def __init__(
  308. self,
  309. compression_type: Literal["jpeg", "webp"] = "jpeg",
  310. quality_range: tuple[int, int] = (99, 100),
  311. p: float = 0.5,
  312. ):
  313. super().__init__(p=p)
  314. self.quality_range = quality_range
  315. self.compression_type = compression_type
  316. def apply(
  317. self,
  318. img: np.ndarray,
  319. quality: int,
  320. image_type: Literal[".jpg", ".webp"],
  321. **params: Any,
  322. ) -> np.ndarray:
  323. """Apply compression to the input image.
  324. Args:
  325. img (np.ndarray): The input image to be compressed.
  326. quality (int): Compression quality level (1-100).
  327. image_type (Literal[".jpg", ".webp"]): File extension indicating compression format.
  328. **params (Any): Additional parameters (not used in this transform).
  329. Returns:
  330. np.ndarray: The compressed image.
  331. """
  332. return fpixel.image_compression(img, quality, image_type)
  333. def get_params(self) -> dict[str, int | str]:
  334. """Generate random parameters for the transform.
  335. Returns:
  336. dict[str, int | str]: Dictionary with the following keys:
  337. - "quality" (int): Random quality value within the specified range.
  338. - "image_type" (str): File extension for the chosen compression type.
  339. """
  340. image_type = ".jpg" if self.compression_type == "jpeg" else ".webp"
  341. return {
  342. "quality": self.py_random.randint(*self.quality_range),
  343. "image_type": image_type,
  344. }
  345. class RandomSnow(ImageOnlyTransform):
  346. """Applies a random snow effect to the input image.
  347. This transform simulates snowfall by either bleaching out some pixel values or
  348. adding a snow texture to the image, depending on the chosen method.
  349. Args:
  350. snow_point_range (tuple[float, float]): Range for the snow point threshold.
  351. Both values should be in the (0, 1) range. Default: (0.1, 0.3).
  352. brightness_coeff (float): Coefficient applied to increase the brightness of pixels
  353. below the snow_point threshold. Larger values lead to more pronounced snow effects.
  354. Should be > 0. Default: 2.5.
  355. method (Literal["bleach", "texture"]): The snow simulation method to use. Options are:
  356. - "bleach": Uses a simple pixel value thresholding technique.
  357. - "texture": Applies a more realistic snow texture overlay.
  358. Default: "texture".
  359. p (float): Probability of applying the transform. Default: 0.5.
  360. Targets:
  361. image
  362. Image types:
  363. uint8, float32
  364. Note:
  365. - The "bleach" method increases the brightness of pixels above a certain threshold,
  366. creating a simple snow effect. This method is faster but may look less realistic.
  367. - The "texture" method creates a more realistic snow effect through the following steps:
  368. 1. Converts the image to HSV color space for better control over brightness.
  369. 2. Increases overall image brightness to simulate the reflective nature of snow.
  370. 3. Generates a snow texture using Gaussian noise, which is then smoothed with a Gaussian filter.
  371. 4. Applies a depth effect to the snow texture, making it more prominent at the top of the image.
  372. 5. Blends the snow texture with the original image using alpha compositing.
  373. 6. Adds a slight blue tint to simulate the cool color of snow.
  374. 7. Adds random sparkle effects to simulate light reflecting off snow crystals.
  375. This method produces a more realistic result but is computationally more expensive.
  376. Mathematical Formulation:
  377. For the "bleach" method:
  378. Let L be the lightness channel in HLS color space.
  379. For each pixel (i, j):
  380. If L[i, j] > snow_point:
  381. L[i, j] = L[i, j] * brightness_coeff
  382. For the "texture" method:
  383. 1. Brightness adjustment: V_new = V * (1 + brightness_coeff * snow_point)
  384. 2. Snow texture generation: T = GaussianFilter(GaussianNoise(μ=0.5, sigma=0.3))
  385. 3. Depth effect: D = LinearGradient(1.0 to 0.2)
  386. 4. Final pixel value: P = (1 - alpha) * original_pixel + alpha * (T * D * 255)
  387. where alpha is the snow intensity factor derived from snow_point.
  388. Examples:
  389. >>> import numpy as np
  390. >>> import albumentations as A
  391. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  392. # Default usage (bleach method)
  393. >>> transform = A.RandomSnow(p=1.0)
  394. >>> snowy_image = transform(image=image)["image"]
  395. # Using texture method with custom parameters
  396. >>> transform = A.RandomSnow(
  397. ... snow_point_range=(0.2, 0.4),
  398. ... brightness_coeff=2.0,
  399. ... method="texture",
  400. ... p=1.0
  401. ... )
  402. >>> snowy_image = transform(image=image)["image"]
  403. References:
  404. - Bleach method: https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
  405. - Texture method: Inspired by computer graphics techniques for snow rendering
  406. and atmospheric scattering simulations.
  407. """
  408. class InitSchema(BaseTransformInitSchema):
  409. snow_point_range: Annotated[
  410. tuple[float, float],
  411. AfterValidator(check_range_bounds(0, 1)),
  412. AfterValidator(nondecreasing),
  413. ]
  414. brightness_coeff: float = Field(gt=0)
  415. method: Literal["bleach", "texture"]
  416. def __init__(
  417. self,
  418. brightness_coeff: float = 2.5,
  419. snow_point_range: tuple[float, float] = (0.1, 0.3),
  420. method: Literal["bleach", "texture"] = "bleach",
  421. p: float = 0.5,
  422. ):
  423. super().__init__(p=p)
  424. self.snow_point_range = snow_point_range
  425. self.brightness_coeff = brightness_coeff
  426. self.method = method
  427. def apply(
  428. self,
  429. img: np.ndarray,
  430. snow_point: float,
  431. snow_texture: np.ndarray,
  432. sparkle_mask: np.ndarray,
  433. **params: Any,
  434. ) -> np.ndarray:
  435. """Apply the snow effect to the input image.
  436. Args:
  437. img (np.ndarray): The input image to apply the snow effect to.
  438. snow_point (float): The snow point threshold.
  439. snow_texture (np.ndarray): The snow texture overlay.
  440. sparkle_mask (np.ndarray): The sparkle mask for the snow effect.
  441. **params (Any): Additional parameters (not used in this transform).
  442. Returns:
  443. np.ndarray: The image with the applied snow effect.
  444. """
  445. non_rgb_error(img)
  446. if self.method == "bleach":
  447. return fpixel.add_snow_bleach(img, snow_point, self.brightness_coeff)
  448. if self.method == "texture":
  449. return fpixel.add_snow_texture(
  450. img,
  451. snow_point,
  452. self.brightness_coeff,
  453. snow_texture,
  454. sparkle_mask,
  455. )
  456. raise ValueError(f"Unknown snow method: {self.method}")
  457. def get_params_dependent_on_data(
  458. self,
  459. params: dict[str, Any],
  460. data: dict[str, Any],
  461. ) -> dict[str, np.ndarray | None]:
  462. """Generate parameters dependent on the input data.
  463. Args:
  464. params (dict[str, Any]): Parameters from the previous transform.
  465. data (dict[str, Any]): Input data.
  466. Returns:
  467. dict[str, np.ndarray | None]: Dictionary with the following keys:
  468. - "snow_point" (np.ndarray | None): The snow point threshold.
  469. - "snow_texture" (np.ndarray | None): The snow texture overlay.
  470. - "sparkle_mask" (np.ndarray | None): The sparkle mask for the snow effect.
  471. """
  472. image_shape = params["shape"][:2]
  473. result = {
  474. "snow_point": self.py_random.uniform(*self.snow_point_range),
  475. "snow_texture": None,
  476. "sparkle_mask": None,
  477. }
  478. if self.method == "texture":
  479. snow_texture, sparkle_mask = fpixel.generate_snow_textures(
  480. img_shape=image_shape,
  481. random_generator=self.random_generator,
  482. )
  483. result["snow_texture"] = snow_texture
  484. result["sparkle_mask"] = sparkle_mask
  485. return result
  486. class RandomGravel(ImageOnlyTransform):
  487. """Adds gravel-like artifacts to the input image.
  488. This transform simulates the appearance of gravel or small stones scattered across
  489. specific regions of an image. It's particularly useful for augmenting datasets of
  490. road or terrain images, adding realistic texture variations.
  491. Args:
  492. gravel_roi (tuple[float, float, float, float]): Region of interest where gravel
  493. will be added, specified as (x_min, y_min, x_max, y_max) in relative coordinates
  494. [0, 1]. Default: (0.1, 0.4, 0.9, 0.9).
  495. number_of_patches (int): Number of gravel patch regions to generate within the ROI.
  496. Each patch will contain multiple gravel particles. Default: 2.
  497. p (float): Probability of applying the transform. Default: 0.5.
  498. Targets:
  499. image
  500. Image types:
  501. uint8, float32
  502. Number of channels:
  503. 3
  504. Note:
  505. - The gravel effect is created by modifying the saturation channel in the HLS color space.
  506. - Gravel particles are distributed within randomly generated patches inside the specified ROI.
  507. - This transform is particularly useful for:
  508. * Augmenting datasets for road condition analysis
  509. * Simulating variations in terrain for computer vision tasks
  510. * Adding realistic texture to synthetic images of outdoor scenes
  511. Mathematical Formulation:
  512. For each gravel patch:
  513. 1. A rectangular region is randomly generated within the specified ROI.
  514. 2. Within this region, multiple gravel particles are placed.
  515. 3. For each particle:
  516. - Random (x, y) coordinates are generated within the patch.
  517. - A random radius (r) between 1 and 3 pixels is assigned.
  518. - A random saturation value (sat) between 0 and 255 is assigned.
  519. 4. The saturation channel of the image is modified for each particle:
  520. image_hls[y-r:y+r, x-r:x+r, 1] = sat
  521. Examples:
  522. >>> import numpy as np
  523. >>> import albumentations as A
  524. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  525. # Default usage
  526. >>> transform = A.RandomGravel(p=1.0)
  527. >>> augmented_image = transform(image=image)["image"]
  528. # Custom ROI and number of patches
  529. >>> transform = A.RandomGravel(
  530. ... gravel_roi=(0.2, 0.2, 0.8, 0.8),
  531. ... number_of_patches=5,
  532. ... p=1.0
  533. ... )
  534. >>> augmented_image = transform(image=image)["image"]
  535. # Combining with other transforms
  536. >>> transform = A.Compose([
  537. ... A.RandomGravel(p=0.7),
  538. ... A.RandomBrightnessContrast(p=0.5),
  539. ... ])
  540. >>> augmented_image = transform(image=image)["image"]
  541. References:
  542. - Road surface textures: https://en.wikipedia.org/wiki/Road_surface
  543. - HLS color space: https://en.wikipedia.org/wiki/HSL_and_HSV
  544. """
  545. class InitSchema(BaseTransformInitSchema):
  546. gravel_roi: tuple[float, float, float, float]
  547. number_of_patches: int = Field(ge=1)
  548. @model_validator(mode="after")
  549. def _validate_gravel_roi(self) -> Self:
  550. gravel_lower_x, gravel_lower_y, gravel_upper_x, gravel_upper_y = self.gravel_roi
  551. if not 0 <= gravel_lower_x < gravel_upper_x <= 1 or not 0 <= gravel_lower_y < gravel_upper_y <= 1:
  552. raise ValueError(f"Invalid gravel_roi. Got: {self.gravel_roi}.")
  553. return self
  554. def __init__(
  555. self,
  556. gravel_roi: tuple[float, float, float, float] = (0.1, 0.4, 0.9, 0.9),
  557. number_of_patches: int = 2,
  558. p: float = 0.5,
  559. ):
  560. super().__init__(p=p)
  561. self.gravel_roi = gravel_roi
  562. self.number_of_patches = number_of_patches
  563. def generate_gravel_patch(
  564. self,
  565. rectangular_roi: tuple[int, int, int, int],
  566. ) -> np.ndarray:
  567. """Generate gravel particles within a specified rectangular region.
  568. Args:
  569. rectangular_roi (tuple[int, int, int, int]): The rectangular region where gravel
  570. particles will be generated, specified as (x_min, y_min, x_max, y_max) in pixel coordinates.
  571. Returns:
  572. np.ndarray: An array of gravel particles with shape (count, 2), where count is the number of particles.
  573. Each row contains the (x, y) coordinates of a gravel particle.
  574. """
  575. x_min, y_min, x_max, y_max = rectangular_roi
  576. area = abs((x_max - x_min) * (y_max - y_min))
  577. count = area // 10
  578. gravels = np.empty([count, 2], dtype=np.int64)
  579. gravels[:, 0] = self.random_generator.integers(x_min, x_max, count)
  580. gravels[:, 1] = self.random_generator.integers(y_min, y_max, count)
  581. return gravels
  582. def apply(
  583. self,
  584. img: np.ndarray,
  585. gravels_infos: list[Any],
  586. **params: Any,
  587. ) -> np.ndarray:
  588. """Apply the gravel effect to the input image.
  589. Args:
  590. img (np.ndarray): The input image to apply the gravel effect to.
  591. gravels_infos (list[Any]): Information about the gravel particles.
  592. **params (Any): Additional parameters (not used in this transform).
  593. Returns:
  594. np.ndarray: The image with the applied gravel effect.
  595. """
  596. return fpixel.add_gravel(img, gravels_infos)
  597. def get_params_dependent_on_data(
  598. self,
  599. params: dict[str, Any],
  600. data: dict[str, Any],
  601. ) -> dict[str, np.ndarray]:
  602. """Generate parameters dependent on the input data.
  603. Args:
  604. params (dict[str, Any]): Parameters from the previous transform.
  605. data (dict[str, Any]): Input data.
  606. Returns:
  607. dict[str, np.ndarray]: Dictionary with the following keys:
  608. - "gravels_infos" (np.ndarray): Information about the gravel particles.
  609. """
  610. height, width = params["shape"][:2]
  611. # Calculate ROI in pixels
  612. x_min, y_min, x_max, y_max = (
  613. int(coord * dim) for coord, dim in zip(self.gravel_roi, [width, height, width, height])
  614. )
  615. roi_width = x_max - x_min
  616. roi_height = y_max - y_min
  617. gravels_info = []
  618. for _ in range(self.number_of_patches):
  619. # Generate a random rectangular region within the ROI
  620. patch_width = self.py_random.randint(roi_width // 10, roi_width // 5)
  621. patch_height = self.py_random.randint(roi_height // 10, roi_height // 5)
  622. patch_x = self.py_random.randint(x_min, x_max - patch_width)
  623. patch_y = self.py_random.randint(y_min, y_max - patch_height)
  624. # Generate gravel particles within this patch
  625. num_particles = (patch_width * patch_height) // 100 # Adjust this divisor to control density
  626. for _ in range(num_particles):
  627. x = self.py_random.randint(patch_x, patch_x + patch_width)
  628. y = self.py_random.randint(patch_y, patch_y + patch_height)
  629. r = self.py_random.randint(1, 3)
  630. sat = self.py_random.randint(0, 255)
  631. gravels_info.append(
  632. [
  633. max(y - r, 0), # min_y
  634. min(y + r, height - 1), # max_y
  635. max(x - r, 0), # min_x
  636. min(x + r, width - 1), # max_x
  637. sat, # saturation
  638. ],
  639. )
  640. return {"gravels_infos": np.array(gravels_info, dtype=np.int64)}
  641. class RandomRain(ImageOnlyTransform):
  642. """Adds rain effects to an image.
  643. This transform simulates rainfall by overlaying semi-transparent streaks onto the image,
  644. creating a realistic rain effect. It can be used to augment datasets for computer vision
  645. tasks that need to perform well in rainy conditions.
  646. Args:
  647. slant_range (tuple[float, float]): Range for the rain slant angle in degrees.
  648. Negative values slant to the left, positive to the right. Default: (-10, 10).
  649. drop_length (int | None): Length of the rain drops in pixels.
  650. If None, drop length will be automatically calculated as height // 8.
  651. This allows the rain effect to scale with the image size.
  652. Default: None
  653. drop_width (int): Width of the rain drops in pixels. Default: 1.
  654. drop_color (tuple[int, int, int]): Color of the rain drops in RGB format. Default: (200, 200, 200).
  655. blur_value (int): Blur value for simulating rain effect. Rainy views are typically blurry. Default: 7.
  656. brightness_coefficient (float): Coefficient to adjust the brightness of the image.
  657. Rainy scenes are usually darker. Should be in the range (0, 1]. Default: 0.7.
  658. rain_type (Literal["drizzle", "heavy", "torrential", "default"]): Type of rain to simulate.
  659. p (float): Probability of applying the transform. Default: 0.5.
  660. Targets:
  661. image
  662. Image types:
  663. uint8, float32
  664. Number of channels:
  665. 3
  666. Note:
  667. - The rain effect is created by drawing semi-transparent lines on the image.
  668. - The slant of the rain can be controlled to simulate wind effects.
  669. - Different rain types (drizzle, heavy, torrential) adjust the density and appearance of the rain.
  670. - The transform also adjusts image brightness and applies a blur to simulate the visual effects of rain.
  671. - This transform is particularly useful for:
  672. * Augmenting datasets for autonomous driving in rainy conditions
  673. * Testing the robustness of computer vision models to weather effects
  674. * Creating realistic rainy scenes for image editing or film production
  675. Mathematical Formulation:
  676. For each raindrop:
  677. 1. Start position (x1, y1) is randomly generated within the image.
  678. 2. End position (x2, y2) is calculated based on drop_length and slant:
  679. x2 = x1 + drop_length * sin(slant)
  680. y2 = y1 + drop_length * cos(slant)
  681. 3. A line is drawn from (x1, y1) to (x2, y2) with the specified drop_color and drop_width.
  682. 4. The image is then blurred and its brightness is adjusted.
  683. Examples:
  684. >>> import numpy as np
  685. >>> import albumentations as A
  686. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  687. # Default usage
  688. >>> transform = A.RandomRain(p=1.0)
  689. >>> rainy_image = transform(image=image)["image"]
  690. # Custom rain parameters
  691. >>> transform = A.RandomRain(
  692. ... slant_range=(-15, 15),
  693. ... drop_length=30,
  694. ... drop_width=2,
  695. ... drop_color=(180, 180, 180),
  696. ... blur_value=5,
  697. ... brightness_coefficient=0.8,
  698. ... p=1.0
  699. ... )
  700. >>> rainy_image = transform(image=image)["image"]
  701. # Simulating heavy rain
  702. >>> transform = A.RandomRain(rain_type="heavy", p=1.0)
  703. >>> heavy_rain_image = transform(image=image)["image"]
  704. References:
  705. - Rain visualization techniques: https://developer.nvidia.com/gpugems/gpugems3/part-iv-image-effects/chapter-27-real-time-rain-rendering
  706. - Weather effects in computer vision: https://www.sciencedirect.com/science/article/pii/S1077314220300692
  707. """
  708. class InitSchema(BaseTransformInitSchema):
  709. slant_range: Annotated[
  710. tuple[float, float],
  711. AfterValidator(nondecreasing),
  712. AfterValidator(check_range_bounds(-MAX_RAIN_ANGLE, MAX_RAIN_ANGLE)),
  713. ]
  714. drop_length: int | None
  715. drop_width: int = Field(ge=1)
  716. drop_color: tuple[int, int, int]
  717. blur_value: int = Field(ge=1)
  718. brightness_coefficient: float = Field(gt=0, le=1)
  719. rain_type: Literal["drizzle", "heavy", "torrential", "default"]
  720. def __init__(
  721. self,
  722. slant_range: tuple[float, float] = (-10, 10),
  723. drop_length: int | None = None,
  724. drop_width: int = 1,
  725. drop_color: tuple[int, int, int] = (200, 200, 200),
  726. blur_value: int = 7,
  727. brightness_coefficient: float = 0.7,
  728. rain_type: Literal["drizzle", "heavy", "torrential", "default"] = "default",
  729. p: float = 0.5,
  730. ):
  731. super().__init__(p=p)
  732. self.slant_range = slant_range
  733. self.drop_length = drop_length
  734. self.drop_width = drop_width
  735. self.drop_color = drop_color
  736. self.blur_value = blur_value
  737. self.brightness_coefficient = brightness_coefficient
  738. self.rain_type = rain_type
  739. def apply(
  740. self,
  741. img: np.ndarray,
  742. slant: float,
  743. drop_length: int,
  744. rain_drops: np.ndarray,
  745. **params: Any,
  746. ) -> np.ndarray:
  747. """Apply the rain effect to the input image.
  748. Args:
  749. img (np.ndarray): The input image to apply the rain effect to.
  750. slant (float): The slant angle of the rain.
  751. drop_length (int): The length of the rain drops.
  752. rain_drops (np.ndarray): The coordinates of the rain drops.
  753. **params (Any): Additional parameters (not used in this transform).
  754. Returns:
  755. np.ndarray: The image with the applied rain effect.
  756. """
  757. non_rgb_error(img)
  758. return fpixel.add_rain(
  759. img,
  760. slant,
  761. drop_length,
  762. self.drop_width,
  763. self.drop_color,
  764. self.blur_value,
  765. self.brightness_coefficient,
  766. rain_drops,
  767. )
  768. def get_params_dependent_on_data(
  769. self,
  770. params: dict[str, Any],
  771. data: dict[str, Any],
  772. ) -> dict[str, Any]:
  773. """Generate parameters dependent on the input data.
  774. Args:
  775. params (dict[str, Any]): Parameters from the previous transform.
  776. data (dict[str, Any]): Input data.
  777. Returns:
  778. dict[str, Any]: Dictionary with the following keys:
  779. - "drop_length" (int): The length of the rain drops.
  780. - "slant" (float): The slant angle of the rain.
  781. - "rain_drops" (np.ndarray): The coordinates of the rain drops.
  782. """
  783. height, width = params["shape"][:2]
  784. # Simpler calculations, directly following Kornia
  785. if self.rain_type == "drizzle":
  786. num_drops = height // 4
  787. elif self.rain_type == "heavy":
  788. num_drops = height
  789. elif self.rain_type == "torrential":
  790. num_drops = height * 2
  791. else:
  792. num_drops = height // 3
  793. drop_length = max(1, height // 8) if self.drop_length is None else self.drop_length
  794. # Simplified slant calculation
  795. slant = self.py_random.uniform(*self.slant_range)
  796. # Single random call for all coordinates
  797. if num_drops > 0:
  798. # Generate all coordinates in one call
  799. coords = self.random_generator.integers(
  800. low=[0, 0],
  801. high=[width, height - drop_length],
  802. size=(num_drops, 2),
  803. dtype=np.int32,
  804. )
  805. rain_drops = coords
  806. else:
  807. rain_drops = np.empty((0, 2), dtype=np.int32)
  808. return {"drop_length": drop_length, "slant": slant, "rain_drops": rain_drops}
  809. class RandomFog(ImageOnlyTransform):
  810. """Simulates fog for the image by adding random fog-like artifacts.
  811. This transform creates a fog effect by generating semi-transparent overlays
  812. that mimic the visual characteristics of fog. The fog intensity and distribution
  813. can be controlled to create various fog-like conditions. An image size dependent
  814. Gaussian blur is applied to the resulting image
  815. Args:
  816. fog_coef_range (tuple[float, float]): Range for fog intensity coefficient. Should be in [0, 1] range.
  817. alpha_coef (float): Transparency of the fog circles. Should be in [0, 1] range. Default: 0.08.
  818. p (float): Probability of applying the transform. Default: 0.5.
  819. Targets:
  820. image
  821. Image types:
  822. uint8, float32
  823. Number of channels:
  824. 3
  825. Note:
  826. - The fog effect is created by overlaying semi-transparent circles on the image.
  827. - Higher fog coefficient values result in denser fog effects.
  828. - The fog is typically denser in the center of the image and gradually decreases towards the edges.
  829. - Image is blurred to decrease the sharpness
  830. - This transform is useful for:
  831. * Simulating various weather conditions in outdoor scenes
  832. * Data augmentation for improving model robustness to foggy conditions
  833. * Creating atmospheric effects in image editing
  834. Mathematical Formulation:
  835. For each fog particle:
  836. 1. A position (x, y) is randomly generated within the image.
  837. 2. A circle with random radius is drawn at this position.
  838. 3. The circle's alpha (transparency) is determined by the alpha_coef.
  839. 4. These circles are overlaid on the original image to create the fog effect.
  840. 5. A Gaussian blur dependent on the shorter dimension is applied
  841. The final pixel value is calculated as:
  842. output = blur((1 - alpha) * original_pixel + alpha * fog_color)
  843. where alpha is influenced by the fog_coef and alpha_coef parameters.
  844. Examples:
  845. >>> import numpy as np
  846. >>> import albumentations as A
  847. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  848. # Default usage
  849. >>> transform = A.RandomFog(p=1.0)
  850. >>> foggy_image = transform(image=image)["image"]
  851. # Custom fog intensity range
  852. >>> transform = A.RandomFog(fog_coef_lower=0.3, fog_coef_upper=0.8, p=1.0)
  853. >>> foggy_image = transform(image=image)["image"]
  854. # Adjust fog transparency
  855. >>> transform = A.RandomFog(fog_coef_lower=0.2, fog_coef_upper=0.5, alpha_coef=0.1, p=1.0)
  856. >>> foggy_image = transform(image=image)["image"]
  857. References:
  858. - Fog: https://en.wikipedia.org/wiki/Fog
  859. - Atmospheric perspective: https://en.wikipedia.org/wiki/Aerial_perspective
  860. """
  861. class InitSchema(BaseTransformInitSchema):
  862. fog_coef_range: Annotated[
  863. tuple[float, float],
  864. AfterValidator(check_range_bounds(0, 1)),
  865. AfterValidator(nondecreasing),
  866. ]
  867. alpha_coef: float = Field(ge=0, le=1)
  868. def __init__(
  869. self,
  870. alpha_coef: float = 0.08,
  871. fog_coef_range: tuple[float, float] = (0.3, 1),
  872. p: float = 0.5,
  873. ):
  874. super().__init__(p=p)
  875. self.fog_coef_range = fog_coef_range
  876. self.alpha_coef = alpha_coef
  877. def apply(
  878. self,
  879. img: np.ndarray,
  880. particle_positions: list[tuple[int, int]],
  881. radiuses: list[int],
  882. intensity: float,
  883. **params: Any,
  884. ) -> np.ndarray:
  885. """Apply the fog effect to the input image.
  886. Args:
  887. img (np.ndarray): The input image to apply the fog effect to.
  888. particle_positions (list[tuple[int, int]]): The coordinates of the fog particles.
  889. radiuses (list[int]): The radii of the fog particles.
  890. intensity (float): The intensity of the fog.
  891. **params (Any): Additional parameters (not used in this transform).
  892. Returns:
  893. np.ndarray: The image with the applied fog effect.
  894. """
  895. non_rgb_error(img)
  896. return fpixel.add_fog(
  897. img,
  898. intensity,
  899. self.alpha_coef,
  900. particle_positions,
  901. radiuses,
  902. )
  903. def get_params_dependent_on_data(
  904. self,
  905. params: dict[str, Any],
  906. data: dict[str, Any],
  907. ) -> dict[str, Any]:
  908. """Generate parameters dependent on the input data.
  909. Args:
  910. params (dict[str, Any]): Parameters from the previous transform.
  911. data (dict[str, Any]): Input data.
  912. Returns:
  913. dict[str, Any]: Dictionary with the following keys:
  914. - "intensity" (float): The intensity of the fog.
  915. - "particle_positions" (list[tuple[int, int]]): The coordinates of the fog particles.
  916. - "radiuses" (list[int]): The radii of the fog particles.
  917. """
  918. # Select a random fog intensity within the specified range
  919. intensity = self.py_random.uniform(*self.fog_coef_range)
  920. image_shape = params["shape"][:2]
  921. image_height, image_width = image_shape
  922. # Calculate the size of the fog effect region based on image width and fog intensity
  923. fog_region_size = max(1, int(image_width // 3 * intensity))
  924. particle_positions = []
  925. # Initialize the central region where fog will be most dense
  926. center_x, center_y = (int(x) for x in fgeometric.center(image_shape))
  927. # Define the initial size of the foggy area
  928. current_width = image_width
  929. current_height = image_height
  930. # Define shrink factor for reducing the foggy area each iteration
  931. shrink_factor = 0.1
  932. max_iterations = 10 # Prevent infinite loop
  933. iteration = 0
  934. while current_width > fog_region_size and current_height > fog_region_size and iteration < max_iterations:
  935. # Calculate the number of particles for this region
  936. area = current_width * current_height
  937. particles_in_region = int(
  938. area / (fog_region_size * fog_region_size) * intensity * 10,
  939. )
  940. for _ in range(particles_in_region):
  941. # Generate random positions within the current region
  942. x = self.py_random.randint(
  943. center_x - current_width // 2,
  944. center_x + current_width // 2,
  945. )
  946. y = self.py_random.randint(
  947. center_y - current_height // 2,
  948. center_y + current_height // 2,
  949. )
  950. particle_positions.append((x, y))
  951. # Shrink the region for the next iteration
  952. current_width = int(current_width * (1 - shrink_factor))
  953. current_height = int(current_height * (1 - shrink_factor))
  954. iteration += 1
  955. radiuses = fpixel.get_fog_particle_radiuses(
  956. image_shape,
  957. len(particle_positions),
  958. intensity,
  959. self.random_generator,
  960. )
  961. return {
  962. "particle_positions": particle_positions,
  963. "intensity": intensity,
  964. "radiuses": radiuses,
  965. }
  966. class RandomSunFlare(ImageOnlyTransform):
  967. """Simulates a sun flare effect on the image by adding circles of light.
  968. This transform creates a sun flare effect by overlaying multiple semi-transparent
  969. circles of varying sizes and intensities along a line originating from a "sun" point.
  970. It offers two methods: a simple overlay technique and a more complex physics-based approach.
  971. Args:
  972. flare_roi (tuple[float, float, float, float]): Region of interest where the sun flare
  973. can appear. Values are in the range [0, 1] and represent (x_min, y_min, x_max, y_max)
  974. in relative coordinates. Default: (0, 0, 1, 0.5).
  975. angle_range (tuple[float, float]): Range of angles (in radians) for the flare direction.
  976. Values should be in the range [0, 1], where 0 represents 0 radians and 1 represents 2π radians.
  977. Default: (0, 1).
  978. num_flare_circles_range (tuple[int, int]): Range for the number of flare circles to generate.
  979. Default: (6, 10).
  980. src_radius (int): Radius of the sun circle in pixels. Default: 400.
  981. src_color (tuple[int, int, int]): Color of the sun in RGB format. Default: (255, 255, 255).
  982. method (Literal["overlay", "physics_based"]): Method to use for generating the sun flare.
  983. "overlay" uses a simple alpha blending technique, while "physics_based" simulates
  984. more realistic optical phenomena. Default: "overlay".
  985. p (float): Probability of applying the transform. Default: 0.5.
  986. Targets:
  987. image
  988. Image types:
  989. uint8, float32
  990. Number of channels:
  991. 3
  992. Note:
  993. The transform offers two methods for generating sun flares:
  994. 1. Overlay Method ("overlay"):
  995. - Creates a simple sun flare effect using basic alpha blending.
  996. - Steps:
  997. a. Generate the main sun circle with a radial gradient.
  998. b. Create smaller flare circles along the flare line.
  999. c. Blend these elements with the original image using alpha compositing.
  1000. - Characteristics:
  1001. * Faster computation
  1002. * Less realistic appearance
  1003. * Suitable for basic augmentation or when performance is a priority
  1004. 2. Physics-based Method ("physics_based"):
  1005. - Simulates more realistic optical phenomena observed in actual lens flares.
  1006. - Steps:
  1007. a. Create a separate flare layer for complex manipulations.
  1008. b. Add the main sun circle and diffraction spikes to simulate light diffraction.
  1009. c. Generate and add multiple flare circles with varying properties.
  1010. d. Apply Gaussian blur to create a soft, glowing effect.
  1011. e. Create and apply a radial gradient mask for natural fading from the center.
  1012. f. Simulate chromatic aberration by applying different blurs to color channels.
  1013. g. Blend the flare with the original image using screen blending mode.
  1014. - Characteristics:
  1015. * More computationally intensive
  1016. * Produces more realistic and visually appealing results
  1017. * Includes effects like diffraction spikes and chromatic aberration
  1018. * Suitable for high-quality augmentation or realistic image synthesis
  1019. Mathematical Formulation:
  1020. For both methods:
  1021. 1. Sun position (x_s, y_s) is randomly chosen within the specified ROI.
  1022. 2. Flare angle θ is randomly chosen from the angle_range.
  1023. 3. For each flare circle i:
  1024. - Position (x_i, y_i) = (x_s + t_i * cos(θ), y_s + t_i * sin(θ))
  1025. where t_i is a random distance along the flare line.
  1026. - Radius r_i is randomly chosen, with larger circles closer to the sun.
  1027. - Alpha (transparency) alpha_i is randomly chosen in the range [0.05, 0.2].
  1028. - Color (R_i, G_i, B_i) is randomly chosen close to src_color.
  1029. Overlay method blending:
  1030. new_pixel = (1 - alpha_i) * original_pixel + alpha_i * flare_color_i
  1031. Physics-based method blending:
  1032. new_pixel = 255 - ((255 - original_pixel) * (255 - flare_pixel) / 255)
  1033. 4. Each flare circle is blended with the image using alpha compositing:
  1034. new_pixel = (1 - alpha_i) * original_pixel + alpha_i * flare_color_i
  1035. Examples:
  1036. >>> import numpy as np
  1037. >>> import albumentations as A
  1038. >>> image = np.random.randint(0, 256, [1000, 1000, 3], dtype=np.uint8)
  1039. # Default sun flare (overlay method)
  1040. >>> transform = A.RandomSunFlare(p=1.0)
  1041. >>> flared_image = transform(image=image)["image"]
  1042. # Physics-based sun flare with custom parameters
  1043. # Default sun flare
  1044. >>> transform = A.RandomSunFlare(p=1.0)
  1045. >>> flared_image = transform(image=image)["image"]
  1046. # Custom sun flare parameters
  1047. >>> transform = A.RandomSunFlare(
  1048. ... flare_roi=(0.1, 0, 0.9, 0.3),
  1049. ... angle_range=(0.25, 0.75),
  1050. ... num_flare_circles_range=(5, 15),
  1051. ... src_radius=200,
  1052. ... src_color=(255, 200, 100),
  1053. ... method="physics_based",
  1054. ... p=1.0
  1055. ... )
  1056. >>> flared_image = transform(image=image)["image"]
  1057. References:
  1058. - Lens flare: https://en.wikipedia.org/wiki/Lens_flare
  1059. - Alpha compositing: https://en.wikipedia.org/wiki/Alpha_compositing
  1060. - Diffraction: https://en.wikipedia.org/wiki/Diffraction
  1061. - Chromatic aberration: https://en.wikipedia.org/wiki/Chromatic_aberration
  1062. - Screen blending: https://en.wikipedia.org/wiki/Blend_modes#Screen
  1063. """
  1064. class InitSchema(BaseTransformInitSchema):
  1065. flare_roi: tuple[float, float, float, float]
  1066. src_radius: int = Field(gt=1)
  1067. src_color: tuple[int, ...]
  1068. angle_range: Annotated[
  1069. tuple[float, float],
  1070. AfterValidator(check_range_bounds(0, 1)),
  1071. AfterValidator(nondecreasing),
  1072. ]
  1073. num_flare_circles_range: Annotated[
  1074. tuple[int, int],
  1075. AfterValidator(check_range_bounds(1, None)),
  1076. AfterValidator(nondecreasing),
  1077. ]
  1078. method: Literal["overlay", "physics_based"]
  1079. @model_validator(mode="after")
  1080. def _validate_parameters(self) -> Self:
  1081. (
  1082. flare_center_lower_x,
  1083. flare_center_lower_y,
  1084. flare_center_upper_x,
  1085. flare_center_upper_y,
  1086. ) = self.flare_roi
  1087. if (
  1088. not 0 <= flare_center_lower_x < flare_center_upper_x <= 1
  1089. or not 0 <= flare_center_lower_y < flare_center_upper_y <= 1
  1090. ):
  1091. raise ValueError(f"Invalid flare_roi. Got: {self.flare_roi}")
  1092. return self
  1093. def __init__(
  1094. self,
  1095. flare_roi: tuple[float, float, float, float] = (0, 0, 1, 0.5),
  1096. src_radius: int = 400,
  1097. src_color: tuple[int, ...] = (255, 255, 255),
  1098. angle_range: tuple[float, float] = (0, 1),
  1099. num_flare_circles_range: tuple[int, int] = (6, 10),
  1100. method: Literal["overlay", "physics_based"] = "overlay",
  1101. p: float = 0.5,
  1102. ):
  1103. super().__init__(p=p)
  1104. self.angle_range = angle_range
  1105. self.num_flare_circles_range = num_flare_circles_range
  1106. self.src_radius = src_radius
  1107. self.src_color = src_color
  1108. self.flare_roi = flare_roi
  1109. self.method = method
  1110. def apply(
  1111. self,
  1112. img: np.ndarray,
  1113. flare_center: tuple[float, float],
  1114. circles: list[Any],
  1115. **params: Any,
  1116. ) -> np.ndarray:
  1117. """Apply the sun flare effect to the input image.
  1118. Args:
  1119. img (np.ndarray): The input image to apply the sun flare effect to.
  1120. flare_center (tuple[float, float]): The center of the sun.
  1121. circles (list[Any]): The circles to apply the sun flare effect to.
  1122. **params (Any): Additional parameters (not used in this transform).
  1123. Returns:
  1124. np.ndarray: The image with the applied sun flare effect.
  1125. """
  1126. non_rgb_error(img)
  1127. if self.method == "overlay":
  1128. return fpixel.add_sun_flare_overlay(
  1129. img,
  1130. flare_center,
  1131. self.src_radius,
  1132. self.src_color,
  1133. circles,
  1134. )
  1135. if self.method == "physics_based":
  1136. return fpixel.add_sun_flare_physics_based(
  1137. img,
  1138. flare_center,
  1139. self.src_radius,
  1140. self.src_color,
  1141. circles,
  1142. )
  1143. raise ValueError(f"Invalid method: {self.method}")
  1144. def get_params_dependent_on_data(
  1145. self,
  1146. params: dict[str, Any],
  1147. data: dict[str, Any],
  1148. ) -> dict[str, Any]:
  1149. """Generate parameters dependent on the input data.
  1150. Args:
  1151. params (dict[str, Any]): Parameters from the previous transform.
  1152. data (dict[str, Any]): Input data.
  1153. Returns:
  1154. dict[str, Any]: Dictionary with the following keys:
  1155. - "circles" (list[Any]): The circles to apply the sun flare effect to.
  1156. - "flare_center" (tuple[float, float]): The center of the sun.
  1157. """
  1158. height, width = params["shape"][:2]
  1159. diagonal = math.sqrt(height**2 + width**2)
  1160. angle = 2 * math.pi * self.py_random.uniform(*self.angle_range)
  1161. # Calculate flare center in pixel coordinates
  1162. x_min, y_min, x_max, y_max = self.flare_roi
  1163. flare_center_x = int(width * self.py_random.uniform(x_min, x_max))
  1164. flare_center_y = int(height * self.py_random.uniform(y_min, y_max))
  1165. num_circles = self.py_random.randint(*self.num_flare_circles_range)
  1166. # Calculate parameters relative to image size
  1167. step_size = max(1, int(diagonal * 0.01)) # 1% of diagonal, minimum 1 pixel
  1168. max_radius = max(2, int(height * 0.01)) # 1% of height, minimum 2 pixels
  1169. color_range = int(max(self.src_color) * 0.2) # 20% of max color value
  1170. def line(t: float) -> tuple[float, float]:
  1171. return (
  1172. flare_center_x + t * math.cos(angle),
  1173. flare_center_y + t * math.sin(angle),
  1174. )
  1175. # Generate points along the flare line
  1176. t_range = range(-flare_center_x, width - flare_center_x, step_size)
  1177. points = [line(t) for t in t_range]
  1178. circles = []
  1179. for _ in range(num_circles):
  1180. alpha = self.py_random.uniform(0.05, 0.2)
  1181. point = self.py_random.choice(points)
  1182. rad = self.py_random.randint(1, max_radius)
  1183. # Generate colors relative to src_color
  1184. colors = [self.py_random.randint(max(c - color_range, 0), c) for c in self.src_color]
  1185. circles.append(
  1186. (
  1187. alpha,
  1188. (int(point[0]), int(point[1])),
  1189. pow(rad, 3),
  1190. tuple(colors),
  1191. ),
  1192. )
  1193. return {
  1194. "circles": circles,
  1195. "flare_center": (flare_center_x, flare_center_y),
  1196. }
  1197. class RandomShadow(ImageOnlyTransform):
  1198. """Simulates shadows for the image by reducing the brightness of the image in shadow regions.
  1199. This transform adds realistic shadow effects to images, which can be useful for augmenting
  1200. datasets for outdoor scene analysis, autonomous driving, or any computer vision task where
  1201. shadows may be present.
  1202. Args:
  1203. shadow_roi (tuple[float, float, float, float]): Region of the image where shadows
  1204. will appear (x_min, y_min, x_max, y_max). All values should be in range [0, 1].
  1205. Default: (0, 0.5, 1, 1).
  1206. num_shadows_limit (tuple[int, int]): Lower and upper limits for the possible number of shadows.
  1207. Default: (1, 2).
  1208. shadow_dimension (int): Number of edges in the shadow polygons. Default: 5.
  1209. shadow_intensity_range (tuple[float, float]): Range for the shadow intensity. Larger value
  1210. means darker shadow. Should be two float values between 0 and 1. Default: (0.5, 0.5).
  1211. p (float): Probability of applying the transform. Default: 0.5.
  1212. Targets:
  1213. image
  1214. Image types:
  1215. uint8, float32
  1216. Number of channels:
  1217. Any
  1218. Note:
  1219. - Shadows are created by generating random polygons within the specified ROI and
  1220. reducing the brightness of the image in these areas.
  1221. - The number of shadows, their shapes, and intensities can be randomized for variety.
  1222. - This transform is particularly useful for:
  1223. * Augmenting datasets for outdoor scene understanding
  1224. * Improving robustness of object detection models to shadowed conditions
  1225. * Simulating different lighting conditions in synthetic datasets
  1226. Mathematical Formulation:
  1227. For each shadow:
  1228. 1. A polygon with `shadow_dimension` vertices is generated within the shadow ROI.
  1229. 2. The shadow intensity a is randomly chosen from `shadow_intensity_range`.
  1230. 3. For each pixel (x, y) within the polygon:
  1231. new_pixel_value = original_pixel_value * (1 - a)
  1232. Examples:
  1233. >>> import numpy as np
  1234. >>> import albumentations as A
  1235. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  1236. # Default usage
  1237. >>> transform = A.RandomShadow(p=1.0)
  1238. >>> shadowed_image = transform(image=image)["image"]
  1239. # Custom shadow parameters
  1240. >>> transform = A.RandomShadow(
  1241. ... shadow_roi=(0.2, 0.2, 0.8, 0.8),
  1242. ... num_shadows_limit=(2, 4),
  1243. ... shadow_dimension=8,
  1244. ... shadow_intensity_range=(0.3, 0.7),
  1245. ... p=1.0
  1246. ... )
  1247. >>> shadowed_image = transform(image=image)["image"]
  1248. # Combining with other transforms
  1249. >>> transform = A.Compose([
  1250. ... A.RandomShadow(p=0.5),
  1251. ... A.RandomBrightnessContrast(p=0.5),
  1252. ... ])
  1253. >>> augmented_image = transform(image=image)["image"]
  1254. References:
  1255. - Shadow detection and removal: https://www.sciencedirect.com/science/article/pii/S1047320315002035
  1256. - Shadows in computer vision: https://en.wikipedia.org/wiki/Shadow_detection
  1257. """
  1258. class InitSchema(BaseTransformInitSchema):
  1259. shadow_roi: tuple[float, float, float, float]
  1260. num_shadows_limit: Annotated[
  1261. tuple[int, int],
  1262. AfterValidator(check_range_bounds(1, None)),
  1263. AfterValidator(nondecreasing),
  1264. ]
  1265. shadow_dimension: int = Field(ge=3)
  1266. shadow_intensity_range: Annotated[
  1267. tuple[float, float],
  1268. AfterValidator(check_range_bounds(0, 1)),
  1269. AfterValidator(nondecreasing),
  1270. ]
  1271. @model_validator(mode="after")
  1272. def _validate_shadows(self) -> Self:
  1273. shadow_lower_x, shadow_lower_y, shadow_upper_x, shadow_upper_y = self.shadow_roi
  1274. if not 0 <= shadow_lower_x <= shadow_upper_x <= 1 or not 0 <= shadow_lower_y <= shadow_upper_y <= 1:
  1275. raise ValueError(f"Invalid shadow_roi. Got: {self.shadow_roi}")
  1276. return self
  1277. def __init__(
  1278. self,
  1279. shadow_roi: tuple[float, float, float, float] = (0, 0.5, 1, 1),
  1280. num_shadows_limit: tuple[int, int] = (1, 2),
  1281. shadow_dimension: int = 5,
  1282. shadow_intensity_range: tuple[float, float] = (0.5, 0.5),
  1283. p: float = 0.5,
  1284. ):
  1285. super().__init__(p=p)
  1286. self.shadow_roi = shadow_roi
  1287. self.shadow_dimension = shadow_dimension
  1288. self.num_shadows_limit = num_shadows_limit
  1289. self.shadow_intensity_range = shadow_intensity_range
  1290. def apply(
  1291. self,
  1292. img: np.ndarray,
  1293. vertices_list: list[np.ndarray],
  1294. intensities: np.ndarray,
  1295. **params: Any,
  1296. ) -> np.ndarray:
  1297. """Apply the shadow effect to the input image.
  1298. Args:
  1299. img (np.ndarray): The input image to apply the shadow effect to.
  1300. vertices_list (list[np.ndarray]): The vertices of the shadow polygons.
  1301. intensities (np.ndarray): The intensities of the shadows.
  1302. **params (Any): Additional parameters (not used in this transform).
  1303. Returns:
  1304. np.ndarray: The image with the applied shadow effect.
  1305. """
  1306. return fpixel.add_shadow(img, vertices_list, intensities)
  1307. def get_params_dependent_on_data(
  1308. self,
  1309. params: dict[str, Any],
  1310. data: dict[str, Any],
  1311. ) -> dict[str, list[np.ndarray]]:
  1312. """Generate parameters dependent on the input data.
  1313. Args:
  1314. params (dict[str, Any]): Parameters from the previous transform.
  1315. data (dict[str, Any]): Input data.
  1316. Returns:
  1317. dict[str, list[np.ndarray]]: Dictionary with the following keys:
  1318. - "vertices_list" (list[np.ndarray]): The vertices of the shadow polygons.
  1319. - "intensities" (np.ndarray): The intensities of the shadows.
  1320. """
  1321. height, width = params["shape"][:2]
  1322. num_shadows = self.py_random.randint(*self.num_shadows_limit)
  1323. x_min, y_min, x_max, y_max = self.shadow_roi
  1324. x_min = int(x_min * width)
  1325. x_max = int(x_max * width)
  1326. y_min = int(y_min * height)
  1327. y_max = int(y_max * height)
  1328. vertices_list = [
  1329. np.stack(
  1330. [
  1331. self.random_generator.integers(
  1332. x_min,
  1333. x_max,
  1334. size=self.shadow_dimension,
  1335. ),
  1336. self.random_generator.integers(
  1337. y_min,
  1338. y_max,
  1339. size=self.shadow_dimension,
  1340. ),
  1341. ],
  1342. axis=1,
  1343. )
  1344. for _ in range(num_shadows)
  1345. ]
  1346. # Sample shadow intensity for each shadow
  1347. intensities = self.random_generator.uniform(
  1348. *self.shadow_intensity_range,
  1349. size=num_shadows,
  1350. )
  1351. return {"vertices_list": vertices_list, "intensities": intensities}
  1352. class RandomToneCurve(ImageOnlyTransform):
  1353. """Randomly change the relationship between bright and dark areas of the image by manipulating its tone curve.
  1354. This transform applies a random S-curve to the image's tone curve, adjusting the brightness and contrast
  1355. in a non-linear manner. It can be applied to the entire image or to each channel separately.
  1356. Args:
  1357. scale (float): Standard deviation of the normal distribution used to sample random distances
  1358. to move two control points that modify the image's curve. Values should be in range [0, 1].
  1359. Higher values will result in more dramatic changes to the image. Default: 0.1
  1360. per_channel (bool): If True, the tone curve will be applied to each channel of the input image separately,
  1361. which can lead to color distortion. If False, the same curve is applied to all channels,
  1362. preserving the original color relationships. Default: False
  1363. p (float): Probability of applying the transform. Default: 0.5
  1364. Targets:
  1365. image
  1366. Image types:
  1367. uint8, float32
  1368. Number of channels:
  1369. Any
  1370. Note:
  1371. - This transform modifies the image's histogram by applying a smooth, S-shaped curve to it.
  1372. - The S-curve is defined by moving two control points of a quadratic Bézier curve.
  1373. - When per_channel is False, the same curve is applied to all channels, maintaining color balance.
  1374. - When per_channel is True, different curves are applied to each channel, which can create color shifts.
  1375. - This transform can be used to adjust image contrast and brightness in a more natural way than linear
  1376. transforms.
  1377. - The effect can range from subtle contrast adjustments to more dramatic "vintage" or "faded" looks.
  1378. Mathematical Formulation:
  1379. 1. Two control points are randomly moved from their default positions (0.25, 0.25) and (0.75, 0.75).
  1380. 2. The new positions are sampled from a normal distribution: N(μ, σ²), where μ is the original position
  1381. and alpha is the scale parameter.
  1382. 3. These points, along with fixed points at (0, 0) and (1, 1), define a quadratic Bézier curve.
  1383. 4. The curve is applied as a lookup table to the image intensities:
  1384. new_intensity = curve(original_intensity)
  1385. Examples:
  1386. >>> import numpy as np
  1387. >>> import albumentations as A
  1388. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  1389. # Apply a random tone curve to all channels together
  1390. >>> transform = A.RandomToneCurve(scale=0.1, per_channel=False, p=1.0)
  1391. >>> augmented_image = transform(image=image)['image']
  1392. # Apply random tone curves to each channel separately
  1393. >>> transform = A.RandomToneCurve(scale=0.2, per_channel=True, p=1.0)
  1394. >>> augmented_image = transform(image=image)['image']
  1395. References:
  1396. - "What Else Can Fool Deep Learning? Addressing Color Constancy Errors on Deep Neural Network Performance":
  1397. https://arxiv.org/abs/1912.06960
  1398. - Bézier curve: https://en.wikipedia.org/wiki/B%C3%A9zier_curve#Quadratic_B%C3%A9zier_curves
  1399. - Tone mapping: https://en.wikipedia.org/wiki/Tone_mapping
  1400. """
  1401. class InitSchema(BaseTransformInitSchema):
  1402. scale: float = Field(
  1403. ge=0,
  1404. le=1,
  1405. )
  1406. per_channel: bool
  1407. def __init__(
  1408. self,
  1409. scale: float = 0.1,
  1410. per_channel: bool = False,
  1411. p: float = 0.5,
  1412. ):
  1413. super().__init__(p=p)
  1414. self.scale = scale
  1415. self.per_channel = per_channel
  1416. def apply(
  1417. self,
  1418. img: np.ndarray,
  1419. low_y: float | np.ndarray,
  1420. high_y: float | np.ndarray,
  1421. **params: Any,
  1422. ) -> np.ndarray:
  1423. """Apply the tone curve to the input image.
  1424. Args:
  1425. img (np.ndarray): The input image to apply the tone curve to.
  1426. low_y (float | np.ndarray): The lower control point of the tone curve.
  1427. high_y (float | np.ndarray): The upper control point of the tone curve.
  1428. **params (Any): Additional parameters (not used in this transform).
  1429. Returns:
  1430. np.ndarray: The image with the applied tone curve.
  1431. """
  1432. return fpixel.move_tone_curve(img, low_y, high_y)
  1433. def get_params_dependent_on_data(
  1434. self,
  1435. params: dict[str, Any],
  1436. data: dict[str, Any],
  1437. ) -> dict[str, Any]:
  1438. """Generate parameters dependent on the input data.
  1439. Args:
  1440. params (dict[str, Any]): Parameters from the previous transform.
  1441. data (dict[str, Any]): Input data.
  1442. Returns:
  1443. dict[str, Any]: Dictionary with the following keys:
  1444. - "low_y" (float | np.ndarray): The lower control point of the tone curve.
  1445. - "high_y" (float | np.ndarray): The upper control point of the tone curve.
  1446. """
  1447. image = data["image"] if "image" in data else data["images"][0]
  1448. num_channels = get_num_channels(image)
  1449. if self.per_channel and num_channels != 1:
  1450. return {
  1451. "low_y": np.clip(
  1452. self.random_generator.normal(
  1453. loc=0.25,
  1454. scale=self.scale,
  1455. size=(num_channels,),
  1456. ),
  1457. 0,
  1458. 1,
  1459. ),
  1460. "high_y": np.clip(
  1461. self.random_generator.normal(
  1462. loc=0.75,
  1463. scale=self.scale,
  1464. size=(num_channels,),
  1465. ),
  1466. 0,
  1467. 1,
  1468. ),
  1469. }
  1470. # Same values for all channels
  1471. low_y = np.clip(self.random_generator.normal(loc=0.25, scale=self.scale), 0, 1)
  1472. high_y = np.clip(self.random_generator.normal(loc=0.75, scale=self.scale), 0, 1)
  1473. return {"low_y": low_y, "high_y": high_y}
  1474. class HueSaturationValue(ImageOnlyTransform):
  1475. """Randomly change hue, saturation and value of the input image.
  1476. This transform adjusts the HSV (Hue, Saturation, Value) channels of an input RGB image.
  1477. It allows for independent control over each channel, providing a wide range of color
  1478. and brightness modifications.
  1479. Args:
  1480. hue_shift_limit (float | tuple[float, float]): Range for changing hue.
  1481. If a single float value is provided, the range will be (-hue_shift_limit, hue_shift_limit).
  1482. Values should be in the range [-180, 180]. Default: (-20, 20).
  1483. sat_shift_limit (float | tuple[float, float]): Range for changing saturation.
  1484. If a single float value is provided, the range will be (-sat_shift_limit, sat_shift_limit).
  1485. Values should be in the range [-255, 255]. Default: (-30, 30).
  1486. val_shift_limit (float | tuple[float, float]): Range for changing value (brightness).
  1487. If a single float value is provided, the range will be (-val_shift_limit, val_shift_limit).
  1488. Values should be in the range [-255, 255]. Default: (-20, 20).
  1489. p (float): Probability of applying the transform. Default: 0.5.
  1490. Targets:
  1491. image
  1492. Image types:
  1493. uint8, float32
  1494. Number of channels:
  1495. 3
  1496. Note:
  1497. - The transform first converts the input RGB image to the HSV color space.
  1498. - Each channel (Hue, Saturation, Value) is adjusted independently.
  1499. - Hue is circular, so it wraps around at 180 degrees.
  1500. - For float32 images, the shift values are applied as percentages of the full range.
  1501. - This transform is particularly useful for color augmentation and simulating
  1502. different lighting conditions.
  1503. Examples:
  1504. >>> import numpy as np
  1505. >>> import albumentations as A
  1506. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  1507. >>> transform = A.HueSaturationValue(
  1508. ... hue_shift_limit=20,
  1509. ... sat_shift_limit=30,
  1510. ... val_shift_limit=20,
  1511. ... p=0.7
  1512. ... )
  1513. >>> result = transform(image=image)
  1514. >>> augmented_image = result["image"]
  1515. References:
  1516. HSV color space: https://en.wikipedia.org/wiki/HSL_and_HSV
  1517. """
  1518. class InitSchema(BaseTransformInitSchema):
  1519. hue_shift_limit: SymmetricRangeType
  1520. sat_shift_limit: SymmetricRangeType
  1521. val_shift_limit: SymmetricRangeType
  1522. def __init__(
  1523. self,
  1524. hue_shift_limit: tuple[float, float] | float = (-20, 20),
  1525. sat_shift_limit: tuple[float, float] | float = (-30, 30),
  1526. val_shift_limit: tuple[float, float] | float = (-20, 20),
  1527. p: float = 0.5,
  1528. ):
  1529. super().__init__(p=p)
  1530. self.hue_shift_limit = cast("tuple[float, float]", hue_shift_limit)
  1531. self.sat_shift_limit = cast("tuple[float, float]", sat_shift_limit)
  1532. self.val_shift_limit = cast("tuple[float, float]", val_shift_limit)
  1533. def apply(
  1534. self,
  1535. img: np.ndarray,
  1536. hue_shift: int,
  1537. sat_shift: int,
  1538. val_shift: int,
  1539. **params: Any,
  1540. ) -> np.ndarray:
  1541. """Apply the hue, saturation, and value shifts to the input image.
  1542. Args:
  1543. img (np.ndarray): The input image to apply the hue, saturation, and value shifts to.
  1544. hue_shift (int): The hue shift value.
  1545. sat_shift (int): The saturation shift value.
  1546. val_shift (int): The value (brightness) shift value.
  1547. **params (Any): Additional parameters (not used in this transform).
  1548. Returns:
  1549. np.ndarray: The image with the applied hue, saturation, and value shifts.
  1550. """
  1551. if not is_rgb_image(img) and not is_grayscale_image(img):
  1552. msg = "HueSaturationValue transformation expects 1-channel or 3-channel images."
  1553. raise TypeError(msg)
  1554. return fpixel.shift_hsv(img, hue_shift, sat_shift, val_shift)
  1555. def get_params(self) -> dict[str, float]:
  1556. """Generate parameters dependent on the input data.
  1557. Returns:
  1558. dict[str, float]: Dictionary with the following keys:
  1559. - "hue_shift" (float): The hue shift value.
  1560. - "sat_shift" (float): The saturation shift value.
  1561. - "val_shift" (float): The value (brightness) shift value.
  1562. """
  1563. return {
  1564. "hue_shift": self.py_random.uniform(*self.hue_shift_limit),
  1565. "sat_shift": self.py_random.uniform(*self.sat_shift_limit),
  1566. "val_shift": self.py_random.uniform(*self.val_shift_limit),
  1567. }
  1568. class Solarize(ImageOnlyTransform):
  1569. """Invert all pixel values above a threshold.
  1570. This transform applies a solarization effect to the input image. Solarization is a phenomenon in
  1571. photography in which the image recorded on a negative or on a photographic print is wholly or
  1572. partially reversed in tone. Dark areas appear light or light areas appear dark.
  1573. In this implementation, all pixel values above a threshold are inverted.
  1574. Args:
  1575. threshold_range (tuple[float, float]): Range for solarizing threshold as a fraction
  1576. of maximum value. The threshold_range should be in the range [0, 1] and will be multiplied by the
  1577. maximum value of the image type (255 for uint8 images or 1.0 for float images).
  1578. Default: (0.5, 0.5) (corresponds to 127.5 for uint8 and 0.5 for float32).
  1579. p (float): Probability of applying the transform. Default: 0.5.
  1580. Targets:
  1581. image
  1582. Image types:
  1583. uint8, float32
  1584. Number of channels:
  1585. Any
  1586. Note:
  1587. - For uint8 images, pixel values above the threshold are inverted as: 255 - pixel_value
  1588. - For float32 images, pixel values above the threshold are inverted as: 1.0 - pixel_value
  1589. - The threshold is applied to each channel independently
  1590. - The threshold is calculated in two steps:
  1591. 1. Sample a value from threshold_range
  1592. 2. Multiply by the image's maximum value:
  1593. * For uint8: threshold = sampled_value * 255
  1594. * For float32: threshold = sampled_value * 1.0
  1595. - This transform can create interesting artistic effects or be used for data augmentation
  1596. Examples:
  1597. >>> import numpy as np
  1598. >>> import albumentations as A
  1599. >>>
  1600. # Solarize uint8 image with fixed threshold at 50% of max value (127.5)
  1601. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  1602. >>> transform = A.Solarize(threshold_range=(0.5, 0.5), p=1.0)
  1603. >>> solarized_image = transform(image=image)['image']
  1604. >>>
  1605. # Solarize uint8 image with random threshold between 40-60% of max value (102-153)
  1606. >>> transform = A.Solarize(threshold_range=(0.4, 0.6), p=1.0)
  1607. >>> solarized_image = transform(image=image)['image']
  1608. >>>
  1609. # Solarize float32 image at 50% of max value (0.5)
  1610. >>> image = np.random.rand(100, 100, 3).astype(np.float32)
  1611. >>> transform = A.Solarize(threshold_range=(0.5, 0.5), p=1.0)
  1612. >>> solarized_image = transform(image=image)['image']
  1613. Mathematical Formulation:
  1614. Let f be a value sampled from threshold_range (min, max).
  1615. For each pixel value p:
  1616. threshold = f * max_value
  1617. if p > threshold:
  1618. p_new = max_value - p
  1619. else:
  1620. p_new = p
  1621. Where max_value is 255 for uint8 images and 1.0 for float32 images.
  1622. See Also:
  1623. Invert: For inverting all pixel values regardless of a threshold.
  1624. """
  1625. class InitSchema(BaseTransformInitSchema):
  1626. threshold_range: Annotated[
  1627. tuple[float, float],
  1628. AfterValidator(check_range_bounds(0, 1)),
  1629. AfterValidator(nondecreasing),
  1630. ]
  1631. def __init__(
  1632. self,
  1633. threshold_range: tuple[float, float] = (0.5, 0.5),
  1634. p: float = 0.5,
  1635. ):
  1636. super().__init__(p=p)
  1637. self.threshold_range = threshold_range
  1638. def apply(self, img: np.ndarray, threshold: float, **params: Any) -> np.ndarray:
  1639. """Apply the solarize effect to the input image.
  1640. Args:
  1641. img (np.ndarray): The input image to apply the solarize effect to.
  1642. threshold (float): The threshold value.
  1643. **params (Any): Additional parameters (not used in this transform).
  1644. Returns:
  1645. np.ndarray: The image with the applied solarize effect.
  1646. """
  1647. return fpixel.solarize(img, threshold)
  1648. def get_params(self) -> dict[str, float]:
  1649. """Generate parameters dependent on the input data.
  1650. Returns:
  1651. dict[str, float]: Dictionary with the following key:
  1652. - "threshold" (float): The threshold value.
  1653. """
  1654. return {"threshold": self.py_random.uniform(*self.threshold_range)}
  1655. class Posterize(ImageOnlyTransform):
  1656. """Reduces the number of bits for each color channel in the image.
  1657. This transform applies color posterization, a technique that reduces the number of distinct
  1658. colors used in an image. It works by lowering the number of bits used to represent each
  1659. color channel, effectively creating a "poster-like" effect with fewer color gradations.
  1660. Args:
  1661. num_bits (int | tuple[int, int] | list[int] | list[tuple[int, int]]):
  1662. Defines the number of bits to keep for each color channel. Can be specified in several ways:
  1663. - Single int: Same number of bits for all channels. Range: [1, 7].
  1664. - tuple of two ints: (min_bits, max_bits) to randomly choose from. Range for each: [1, 7].
  1665. - list of three ints: Specific number of bits for each channel [r_bits, g_bits, b_bits].
  1666. - list of three tuples: Ranges for each channel [(r_min, r_max), (g_min, g_max), (b_min, b_max)].
  1667. Default: 4
  1668. p (float): Probability of applying the transform. Default: 0.5.
  1669. Targets:
  1670. image
  1671. Image types:
  1672. uint8, float32
  1673. Number of channels:
  1674. Any
  1675. Note:
  1676. - The effect becomes more pronounced as the number of bits is reduced.
  1677. - This transform can create interesting artistic effects or be used for image compression simulation.
  1678. - Posterization is particularly useful for:
  1679. * Creating stylized or retro-looking images
  1680. * Reducing the color palette for specific artistic effects
  1681. * Simulating the look of older or lower-quality digital images
  1682. * Data augmentation in scenarios where color depth might vary
  1683. Mathematical Background:
  1684. For an 8-bit color channel, posterization to n bits can be expressed as:
  1685. new_value = (old_value >> (8 - n)) << (8 - n)
  1686. This operation keeps the n most significant bits and sets the rest to zero.
  1687. Examples:
  1688. >>> import numpy as np
  1689. >>> import albumentations as A
  1690. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  1691. # Posterize all channels to 3 bits
  1692. >>> transform = A.Posterize(num_bits=3, p=1.0)
  1693. >>> posterized_image = transform(image=image)["image"]
  1694. # Randomly posterize between 2 and 5 bits
  1695. >>> transform = A.Posterize(num_bits=(2, 5), p=1.0)
  1696. >>> posterized_image = transform(image=image)["image"]
  1697. # Different bits for each channel
  1698. >>> transform = A.Posterize(num_bits=[3, 5, 2], p=1.0)
  1699. >>> posterized_image = transform(image=image)["image"]
  1700. # Range of bits for each channel
  1701. >>> transform = A.Posterize(num_bits=[(1, 3), (3, 5), (2, 4)], p=1.0)
  1702. >>> posterized_image = transform(image=image)["image"]
  1703. References:
  1704. - Color Quantization: https://en.wikipedia.org/wiki/Color_quantization
  1705. - Posterization: https://en.wikipedia.org/wiki/Posterization
  1706. """
  1707. class InitSchema(BaseTransformInitSchema):
  1708. num_bits: int | tuple[int, int] | list[tuple[int, int]]
  1709. @field_validator("num_bits")
  1710. @classmethod
  1711. def _validate_num_bits(
  1712. cls,
  1713. num_bits: Any,
  1714. ) -> tuple[int, int] | list[tuple[int, int]]:
  1715. if isinstance(num_bits, int):
  1716. if num_bits < 1 or num_bits > SEVEN:
  1717. raise ValueError("num_bits must be in the range [1, 7]")
  1718. return (num_bits, num_bits)
  1719. if isinstance(num_bits, Sequence) and len(num_bits) > PAIR:
  1720. return [to_tuple(i, i) for i in num_bits]
  1721. return to_tuple(num_bits, num_bits)
  1722. def __init__(
  1723. self,
  1724. num_bits: int | tuple[int, int] | list[tuple[int, int]] = 4,
  1725. p: float = 0.5,
  1726. ):
  1727. super().__init__(p=p)
  1728. self.num_bits = cast("Union[tuple[int, int], list[tuple[int, int]]]", num_bits)
  1729. def apply(
  1730. self,
  1731. img: np.ndarray,
  1732. num_bits: Literal[1, 2, 3, 4, 5, 6, 7] | list[Literal[1, 2, 3, 4, 5, 6, 7]],
  1733. **params: Any,
  1734. ) -> np.ndarray:
  1735. """Apply the posterize effect to the input image.
  1736. Args:
  1737. img (np.ndarray): The input image to apply the posterize effect to.
  1738. num_bits (Literal[1, 2, 3, 4, 5, 6, 7] | list[Literal[1, 2, 3, 4, 5, 6, 7]]):
  1739. The number of bits to keep for each color channel.
  1740. **params (Any): Additional parameters (not used in this transform).
  1741. Returns:
  1742. np.ndarray: The image with the applied posterize effect.
  1743. """
  1744. return fpixel.posterize(img, num_bits)
  1745. def get_params(self) -> dict[str, Any]:
  1746. """Generate parameters dependent on the input data.
  1747. Returns:
  1748. dict[str, Any]: Dictionary with the following key:
  1749. - "num_bits" (Literal[1, 2, 3, 4, 5, 6, 7] | list[Literal[1, 2, 3, 4, 5, 6, 7]]):
  1750. The number of bits to keep for each color channel.
  1751. """
  1752. if isinstance(self.num_bits, list):
  1753. num_bits = [self.py_random.randint(*i) for i in self.num_bits]
  1754. return {"num_bits": num_bits}
  1755. return {"num_bits": self.py_random.randint(*self.num_bits)}
  1756. class Equalize(ImageOnlyTransform):
  1757. """Equalize the image histogram.
  1758. This transform applies histogram equalization to the input image. Histogram equalization
  1759. is a method in image processing of contrast adjustment using the image's histogram.
  1760. Args:
  1761. mode (Literal['cv', 'pil']): Use OpenCV or Pillow equalization method.
  1762. Default: 'cv'
  1763. by_channels (bool): If True, use equalization by channels separately,
  1764. else convert image to YCbCr representation and use equalization by `Y` channel.
  1765. Default: True
  1766. mask (np.ndarray, callable): If given, only the pixels selected by
  1767. the mask are included in the analysis. Can be:
  1768. - A 1-channel or 3-channel numpy array of the same size as the input image.
  1769. - A callable (function) that generates a mask. The function should accept 'image'
  1770. as its first argument, and can accept additional arguments specified in mask_params.
  1771. Default: None
  1772. mask_params (list[str]): Additional parameters to pass to the mask function.
  1773. These parameters will be taken from the data dict passed to __call__.
  1774. Default: ()
  1775. p (float): Probability of applying the transform. Default: 0.5.
  1776. Targets:
  1777. image, volume
  1778. Image types:
  1779. uint8, float32
  1780. Number of channels:
  1781. 1,3
  1782. Note:
  1783. - When mode='cv', OpenCV's equalizeHist() function is used.
  1784. - When mode='pil', Pillow's equalize() function is used.
  1785. - The 'by_channels' parameter determines whether equalization is applied to each color channel
  1786. independently (True) or to the luminance channel only (False).
  1787. - If a mask is provided as a numpy array, it should have the same height and width as the input image.
  1788. - If a mask is provided as a function, it allows for dynamic mask generation based on the input image
  1789. and additional parameters. This is useful for scenarios where the mask depends on the image content
  1790. or external data (e.g., bounding boxes, segmentation masks).
  1791. Mask Function:
  1792. When mask is a callable, it should have the following signature:
  1793. mask_func(image, *args) -> np.ndarray
  1794. - image: The input image (numpy array)
  1795. - *args: Additional arguments as specified in mask_params
  1796. The function should return a numpy array of the same height and width as the input image,
  1797. where non-zero pixels indicate areas to be equalized.
  1798. Examples:
  1799. >>> import numpy as np
  1800. >>> import albumentations as A
  1801. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  1802. >>>
  1803. >>> # Using a static mask
  1804. >>> mask = np.random.randint(0, 2, (100, 100), dtype=np.uint8)
  1805. >>> transform = A.Equalize(mask=mask, p=1.0)
  1806. >>> result = transform(image=image)
  1807. >>>
  1808. >>> # Using a dynamic mask function
  1809. >>> def mask_func(image, bboxes):
  1810. ... mask = np.ones_like(image[:, :, 0], dtype=np.uint8)
  1811. ... for bbox in bboxes:
  1812. ... x1, y1, x2, y2 = map(int, bbox)
  1813. ... mask[y1:y2, x1:x2] = 0 # Exclude areas inside bounding boxes
  1814. ... return mask
  1815. >>>
  1816. >>> transform = A.Equalize(mask=mask_func, mask_params=['bboxes'], p=1.0)
  1817. >>> bboxes = [(10, 10, 50, 50), (60, 60, 90, 90)] # Example bounding boxes
  1818. >>> result = transform(image=image, bboxes=bboxes)
  1819. References:
  1820. - OpenCV equalizeHist: https://docs.opencv.org/3.4/d6/dc7/group__imgproc__hist.html#ga7e54091f0c937d49bf84152a16f76d6e
  1821. - Pillow ImageOps.equalize: https://pillow.readthedocs.io/en/stable/reference/ImageOps.html#PIL.ImageOps.equalize
  1822. - Histogram Equalization: https://en.wikipedia.org/wiki/Histogram_equalization
  1823. """
  1824. class InitSchema(BaseTransformInitSchema):
  1825. mode: Literal["cv", "pil"]
  1826. by_channels: bool
  1827. mask: np.ndarray | Callable[..., Any] | None
  1828. mask_params: Sequence[str]
  1829. def __init__(
  1830. self,
  1831. mode: Literal["cv", "pil"] = "cv",
  1832. by_channels: bool = True,
  1833. mask: np.ndarray | Callable[..., Any] | None = None,
  1834. mask_params: Sequence[str] = (),
  1835. p: float = 0.5,
  1836. ):
  1837. super().__init__(p=p)
  1838. self.mode = mode
  1839. self.by_channels = by_channels
  1840. self.mask = mask
  1841. self.mask_params = mask_params
  1842. def apply(self, img: np.ndarray, mask: np.ndarray, **params: Any) -> np.ndarray:
  1843. """Apply the equalization effect to the input image.
  1844. Args:
  1845. img (np.ndarray): The input image to apply the equalization effect to.
  1846. mask (np.ndarray): The mask to apply the equalization effect to.
  1847. **params (Any): Additional parameters (not used in this transform).
  1848. Returns:
  1849. np.ndarray: The image with the applied equalization effect.
  1850. """
  1851. if not is_rgb_image(img) and not is_grayscale_image(img):
  1852. raise ValueError("Equalize transform is only supported for RGB and grayscale images.")
  1853. return fpixel.equalize(
  1854. img,
  1855. mode=self.mode,
  1856. by_channels=self.by_channels,
  1857. mask=mask,
  1858. )
  1859. def get_params_dependent_on_data(
  1860. self,
  1861. params: dict[str, Any],
  1862. data: dict[str, Any],
  1863. ) -> dict[str, Any]:
  1864. """Generate parameters dependent on the input data.
  1865. Args:
  1866. params (dict[str, Any]): Parameters from the previous transform.
  1867. data (dict[str, Any]): Input data.
  1868. Returns:
  1869. dict[str, Any]: Dictionary with the following key:
  1870. - "mask" (np.ndarray): The mask to apply the equalization effect to.
  1871. """
  1872. if not callable(self.mask):
  1873. return {"mask": self.mask}
  1874. mask_params = {"image": data["image"]}
  1875. for key in self.mask_params:
  1876. if key not in data:
  1877. raise KeyError(
  1878. f"Required parameter '{key}' for mask function is missing in data.",
  1879. )
  1880. mask_params[key] = data[key]
  1881. return {"mask": self.mask(**mask_params)}
  1882. @property
  1883. def targets_as_params(self) -> list[str]:
  1884. """Return the list of parameters that are used for generating the mask.
  1885. Returns:
  1886. list[str]: List of parameter names.
  1887. """
  1888. return [*list(self.mask_params)]
  1889. class RandomBrightnessContrast(ImageOnlyTransform):
  1890. """Randomly changes the brightness and contrast of the input image.
  1891. This transform adjusts the brightness and contrast of an image simultaneously, allowing for
  1892. a wide range of lighting and contrast variations. It's particularly useful for data augmentation
  1893. in computer vision tasks, helping models become more robust to different lighting conditions.
  1894. Args:
  1895. brightness_limit (float | tuple[float, float]): Factor range for changing brightness.
  1896. If a single float value is provided, the range will be (-brightness_limit, brightness_limit).
  1897. Values should typically be in the range [-1.0, 1.0], where 0 means no change,
  1898. 1.0 means maximum brightness, and -1.0 means minimum brightness.
  1899. Default: (-0.2, 0.2).
  1900. contrast_limit (float | tuple[float, float]): Factor range for changing contrast.
  1901. If a single float value is provided, the range will be (-contrast_limit, contrast_limit).
  1902. Values should typically be in the range [-1.0, 1.0], where 0 means no change,
  1903. 1.0 means maximum increase in contrast, and -1.0 means maximum decrease in contrast.
  1904. Default: (-0.2, 0.2).
  1905. brightness_by_max (bool): If True, adjusts brightness by scaling pixel values up to the
  1906. maximum value of the image's dtype. If False, uses the mean pixel value for adjustment.
  1907. Default: True.
  1908. ensure_safe_range (bool): If True, adjusts alpha and beta to prevent overflow/underflow.
  1909. This ensures output values stay within the valid range for the image dtype without clipping.
  1910. Default: False.
  1911. p (float): Probability of applying the transform. Default: 0.5.
  1912. Targets:
  1913. image, volume
  1914. Image types:
  1915. uint8, float32
  1916. Number of channels:
  1917. Any
  1918. Note:
  1919. - The order of operation is: contrast adjustment, then brightness adjustment.
  1920. - For uint8 images, the output is clipped to [0, 255] range.
  1921. - For float32 images, the output is clipped to [0, 1] range.
  1922. - The `brightness_by_max` parameter affects how brightness is adjusted:
  1923. * If True, brightness adjustment is more pronounced and can lead to more saturated results.
  1924. * If False, brightness adjustment is more subtle and preserves the overall lighting better.
  1925. - This transform is useful for:
  1926. * Simulating different lighting conditions
  1927. * Enhancing low-light or overexposed images
  1928. * Data augmentation to improve model robustness
  1929. Mathematical Formulation:
  1930. Let a be the contrast adjustment factor and β be the brightness adjustment factor.
  1931. For each pixel value x:
  1932. 1. Contrast adjustment: x' = clip((x - mean) * (1 + a) + mean)
  1933. 2. Brightness adjustment:
  1934. If brightness_by_max is True: x'' = clip(x' * (1 + β))
  1935. If brightness_by_max is False: x'' = clip(x' + β * max_value)
  1936. Where clip() ensures values stay within the valid range for the image dtype.
  1937. Examples:
  1938. >>> import numpy as np
  1939. >>> import albumentations as A
  1940. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  1941. # Default usage
  1942. >>> transform = A.RandomBrightnessContrast(p=1.0)
  1943. >>> augmented_image = transform(image=image)["image"]
  1944. # Custom brightness and contrast limits
  1945. >>> transform = A.RandomBrightnessContrast(
  1946. ... brightness_limit=0.3,
  1947. ... contrast_limit=0.3,
  1948. ... p=1.0
  1949. ... )
  1950. >>> augmented_image = transform(image=image)["image"]
  1951. # Adjust brightness based on mean value
  1952. >>> transform = A.RandomBrightnessContrast(
  1953. ... brightness_limit=0.2,
  1954. ... contrast_limit=0.2,
  1955. ... brightness_by_max=False,
  1956. ... p=1.0
  1957. ... )
  1958. >>> augmented_image = transform(image=image)["image"]
  1959. References:
  1960. - Brightness: https://en.wikipedia.org/wiki/Brightness
  1961. - Contrast: https://en.wikipedia.org/wiki/Contrast_(vision)
  1962. """
  1963. class InitSchema(BaseTransformInitSchema):
  1964. brightness_limit: SymmetricRangeType
  1965. contrast_limit: SymmetricRangeType
  1966. brightness_by_max: bool
  1967. ensure_safe_range: bool
  1968. def __init__(
  1969. self,
  1970. brightness_limit: tuple[float, float] | float = (-0.2, 0.2),
  1971. contrast_limit: tuple[float, float] | float = (-0.2, 0.2),
  1972. brightness_by_max: bool = True,
  1973. ensure_safe_range: bool = False,
  1974. p: float = 0.5,
  1975. ):
  1976. super().__init__(p=p)
  1977. self.brightness_limit = cast("tuple[float, float]", brightness_limit)
  1978. self.contrast_limit = cast("tuple[float, float]", contrast_limit)
  1979. self.brightness_by_max = brightness_by_max
  1980. self.ensure_safe_range = ensure_safe_range
  1981. def apply(
  1982. self,
  1983. img: np.ndarray,
  1984. alpha: float,
  1985. beta: float,
  1986. **params: Any,
  1987. ) -> np.ndarray:
  1988. """Apply the brightness and contrast adjustment to the input image.
  1989. Args:
  1990. img (np.ndarray): The input image to apply the brightness and contrast adjustment to.
  1991. alpha (float): The contrast adjustment factor.
  1992. beta (float): The brightness adjustment factor.
  1993. **params (Any): Additional parameters (not used in this transform).
  1994. Returns:
  1995. np.ndarray: The image with the applied brightness and contrast adjustment.
  1996. """
  1997. return albucore.multiply_add(img, alpha, beta, inplace=False)
  1998. def apply_to_images(self, images: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  1999. """Apply the brightness and contrast adjustment to a batch of images.
  2000. Args:
  2001. images (np.ndarray): The batch of images to apply the brightness and contrast adjustment to.
  2002. *args (Any): Additional arguments.
  2003. **params (Any): Additional parameters (not used in this transform).
  2004. Returns:
  2005. np.ndarray: The batch of images with the applied brightness and contrast adjustment.
  2006. """
  2007. return self.apply(images, *args, **params)
  2008. def apply_to_volumes(self, volumes: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  2009. """Apply the brightness and contrast adjustment to a batch of volumes.
  2010. Args:
  2011. volumes (np.ndarray): The batch of volumes to apply the brightness and contrast adjustment to.
  2012. *args (Any): Additional arguments.
  2013. **params (Any): Additional parameters (not used in this transform).
  2014. Returns:
  2015. np.ndarray: The batch of volumes with the applied brightness and contrast adjustment.
  2016. """
  2017. return self.apply(volumes, *args, **params)
  2018. def apply_to_volume(self, volume: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  2019. """Apply the brightness and contrast adjustment to a single volume.
  2020. Args:
  2021. volume (np.ndarray): The volume to apply the brightness and contrast adjustment to.
  2022. *args (Any): Additional arguments.
  2023. **params (Any): Additional parameters (not used in this transform).
  2024. Returns:
  2025. np.ndarray: The volume with the applied brightness and contrast adjustment.
  2026. """
  2027. return self.apply(volume, *args, **params)
  2028. def get_params_dependent_on_data(
  2029. self,
  2030. params: dict[str, Any],
  2031. data: dict[str, Any],
  2032. ) -> dict[str, float]:
  2033. """Generate parameters dependent on the input data.
  2034. Args:
  2035. params (dict[str, Any]): Parameters from the previous transform.
  2036. data (dict[str, Any]): Input data.
  2037. Returns:
  2038. dict[str, float]: Dictionary with the following keys:
  2039. - "alpha" (float): The contrast adjustment factor.
  2040. - "beta" (float): The brightness adjustment factor.
  2041. """
  2042. image = data["image"] if "image" in data else data["images"][0]
  2043. # Sample initial values
  2044. alpha = 1.0 + self.py_random.uniform(*self.contrast_limit)
  2045. beta = self.py_random.uniform(*self.brightness_limit)
  2046. max_value = MAX_VALUES_BY_DTYPE[image.dtype]
  2047. # Scale beta according to brightness_by_max setting
  2048. beta = beta * max_value if self.brightness_by_max else beta * np.mean(image)
  2049. # Clip values to safe ranges if needed
  2050. if self.ensure_safe_range:
  2051. alpha, beta = fpixel.get_safe_brightness_contrast_params(
  2052. alpha,
  2053. beta,
  2054. max_value,
  2055. )
  2056. return {
  2057. "alpha": alpha,
  2058. "beta": beta,
  2059. }
  2060. class GaussNoise(ImageOnlyTransform):
  2061. """Apply Gaussian noise to the input image.
  2062. Args:
  2063. std_range (tuple[float, float]): Range for noise standard deviation as a fraction
  2064. of the maximum value (255 for uint8 images or 1.0 for float images).
  2065. Values should be in range [0, 1]. Default: (0.2, 0.44).
  2066. mean_range (tuple[float, float]): Range for noise mean as a fraction
  2067. of the maximum value (255 for uint8 images or 1.0 for float images).
  2068. Values should be in range [-1, 1]. Default: (0.0, 0.0).
  2069. per_channel (bool): If True, noise will be sampled for each channel independently.
  2070. Otherwise, the noise will be sampled once for all channels. Default: True.
  2071. noise_scale_factor (float): Scaling factor for noise generation. Value should be in the range (0, 1].
  2072. When set to 1, noise is sampled for each pixel independently. If less, noise is sampled for a smaller size
  2073. and resized to fit the shape of the image. Smaller values make the transform faster. Default: 1.0.
  2074. p (float): Probability of applying the transform. Default: 0.5.
  2075. Targets:
  2076. image, volume
  2077. Image types:
  2078. uint8, float32
  2079. Number of channels:
  2080. Any
  2081. Note:
  2082. - The noise parameters (std_range and mean_range) are normalized to [0, 1] range:
  2083. * For uint8 images, they are multiplied by 255
  2084. * For float32 images, they are used directly
  2085. - Setting per_channel=False is faster but applies the same noise to all channels
  2086. - The noise_scale_factor parameter allows for a trade-off between transform speed and noise granularity
  2087. Examples:
  2088. >>> import numpy as np
  2089. >>> import albumentations as A
  2090. >>> image = np.random.randint(0, 256, (224, 224, 3), dtype=np.uint8)
  2091. >>>
  2092. >>> # Apply Gaussian noise with normalized std_range
  2093. >>> transform = A.GaussNoise(std_range=(0.1, 0.2), p=1.0) # 10-20% of max value
  2094. >>> noisy_image = transform(image=image)['image']
  2095. """
  2096. class InitSchema(BaseTransformInitSchema):
  2097. std_range: Annotated[
  2098. tuple[float, float],
  2099. AfterValidator(check_range_bounds(0, 1)),
  2100. AfterValidator(nondecreasing),
  2101. ]
  2102. mean_range: Annotated[
  2103. tuple[float, float],
  2104. AfterValidator(check_range_bounds(-1, 1)),
  2105. AfterValidator(nondecreasing),
  2106. ]
  2107. per_channel: bool
  2108. noise_scale_factor: float = Field(gt=0, le=1)
  2109. def __init__(
  2110. self,
  2111. std_range: tuple[float, float] = (0.2, 0.44), # sqrt(10 / 255), sqrt(50 / 255)
  2112. mean_range: tuple[float, float] = (0.0, 0.0),
  2113. per_channel: bool = True,
  2114. noise_scale_factor: float = 1,
  2115. p: float = 0.5,
  2116. ):
  2117. super().__init__(p=p)
  2118. self.std_range = std_range
  2119. self.mean_range = mean_range
  2120. self.per_channel = per_channel
  2121. self.noise_scale_factor = noise_scale_factor
  2122. def apply(
  2123. self,
  2124. img: np.ndarray,
  2125. noise_map: np.ndarray,
  2126. **params: Any,
  2127. ) -> np.ndarray:
  2128. """Apply the Gaussian noise to the input image.
  2129. Args:
  2130. img (np.ndarray): The input image to apply the Gaussian noise to.
  2131. noise_map (np.ndarray): The noise map to apply to the image.
  2132. **params (Any): Additional parameters (not used in this transform).
  2133. Returns:
  2134. np.ndarray: The image with the applied Gaussian noise.
  2135. """
  2136. return fpixel.add_noise(img, noise_map)
  2137. def get_params_dependent_on_data(
  2138. self,
  2139. params: dict[str, Any],
  2140. data: dict[str, Any],
  2141. ) -> dict[str, float]:
  2142. """Generate parameters dependent on the input data.
  2143. Args:
  2144. params (dict[str, Any]): Parameters from the previous transform.
  2145. data (dict[str, Any]): Input data.
  2146. Returns:
  2147. dict[str, float]: Dictionary with the following key:
  2148. - "noise_map" (np.ndarray): The noise map to apply to the image.
  2149. """
  2150. image = data["image"] if "image" in data else data["images"][0]
  2151. max_value = MAX_VALUES_BY_DTYPE[image.dtype]
  2152. sigma = self.py_random.uniform(*self.std_range)
  2153. mean = self.py_random.uniform(*self.mean_range)
  2154. noise_map = fpixel.generate_noise(
  2155. noise_type="gaussian",
  2156. spatial_mode="per_pixel" if self.per_channel else "shared",
  2157. shape=image.shape,
  2158. params={"mean_range": (mean, mean), "std_range": (sigma, sigma)},
  2159. max_value=max_value,
  2160. approximation=self.noise_scale_factor,
  2161. random_generator=self.random_generator,
  2162. )
  2163. return {"noise_map": noise_map}
  2164. class ISONoise(ImageOnlyTransform):
  2165. """Applies camera sensor noise to the input image, simulating high ISO settings.
  2166. This transform adds random noise to an image, mimicking the effect of using high ISO settings
  2167. in digital photography. It simulates two main components of ISO noise:
  2168. 1. Color noise: random shifts in color hue
  2169. 2. Luminance noise: random variations in pixel intensity
  2170. Args:
  2171. color_shift (tuple[float, float]): Range for changing color hue.
  2172. Values should be in the range [0, 1], where 1 represents a full 360° hue rotation.
  2173. Default: (0.01, 0.05)
  2174. intensity (tuple[float, float]): Range for the noise intensity.
  2175. Higher values increase the strength of both color and luminance noise.
  2176. Default: (0.1, 0.5)
  2177. p (float): Probability of applying the transform. Default: 0.5
  2178. Targets:
  2179. image, volume
  2180. Image types:
  2181. uint8, float32
  2182. Number of channels:
  2183. 3
  2184. Note:
  2185. - This transform only works with RGB images. It will raise a TypeError if applied to
  2186. non-RGB images.
  2187. - The color shift is applied in the HSV color space, affecting the hue channel.
  2188. - Luminance noise is added to all channels independently.
  2189. - This transform can be useful for data augmentation in low-light scenarios or when
  2190. training models to be robust against noisy inputs.
  2191. Examples:
  2192. >>> import numpy as np
  2193. >>> import albumentations as A
  2194. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  2195. >>> transform = A.ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.5), p=0.5)
  2196. >>> result = transform(image=image)
  2197. >>> noisy_image = result["image"]
  2198. References:
  2199. ISO noise in digital photography: https://en.wikipedia.org/wiki/Image_noise#In_digital_cameras
  2200. """
  2201. class InitSchema(BaseTransformInitSchema):
  2202. color_shift: Annotated[
  2203. tuple[float, float],
  2204. AfterValidator(check_range_bounds(0, 1)),
  2205. AfterValidator(nondecreasing),
  2206. ]
  2207. intensity: Annotated[
  2208. tuple[float, float],
  2209. AfterValidator(check_range_bounds(0, None)),
  2210. AfterValidator(nondecreasing),
  2211. ]
  2212. def __init__(
  2213. self,
  2214. color_shift: tuple[float, float] = (0.01, 0.05),
  2215. intensity: tuple[float, float] = (0.1, 0.5),
  2216. p: float = 0.5,
  2217. ):
  2218. super().__init__(p=p)
  2219. self.intensity = intensity
  2220. self.color_shift = color_shift
  2221. def apply(
  2222. self,
  2223. img: np.ndarray,
  2224. color_shift: float,
  2225. intensity: float,
  2226. random_seed: int,
  2227. **params: Any,
  2228. ) -> np.ndarray:
  2229. """Apply the ISONoise transform to the input image.
  2230. Args:
  2231. img (np.ndarray): The input image to apply the ISONoise transform to.
  2232. color_shift (float): The color shift value.
  2233. intensity (float): The intensity value.
  2234. random_seed (int): The random seed.
  2235. **params (Any): Additional parameters (not used in this transform).
  2236. Returns:
  2237. np.ndarray: The image with the applied ISONoise transform.
  2238. """
  2239. non_rgb_error(img)
  2240. return fpixel.iso_noise(
  2241. img,
  2242. color_shift,
  2243. intensity,
  2244. np.random.default_rng(random_seed),
  2245. )
  2246. def get_params_dependent_on_data(
  2247. self,
  2248. params: dict[str, Any],
  2249. data: dict[str, Any],
  2250. ) -> dict[str, Any]:
  2251. """Generate parameters dependent on the input data.
  2252. Args:
  2253. params (dict[str, Any]): Parameters from the previous transform.
  2254. data (dict[str, Any]): Input data.
  2255. Returns:
  2256. dict[str, Any]: Dictionary with the following keys:
  2257. - "color_shift" (float): The color shift value.
  2258. - "intensity" (float): The intensity value.
  2259. - "random_seed" (int): The random seed.
  2260. """
  2261. random_seed = self.random_generator.integers(0, 2**32 - 1)
  2262. return {
  2263. "color_shift": self.py_random.uniform(*self.color_shift),
  2264. "intensity": self.py_random.uniform(*self.intensity),
  2265. "random_seed": random_seed,
  2266. }
  2267. class CLAHE(ImageOnlyTransform):
  2268. """Apply Contrast Limited Adaptive Histogram Equalization (CLAHE) to the input image.
  2269. CLAHE is an advanced method of improving the contrast in an image. Unlike regular histogram
  2270. equalization, which operates on the entire image, CLAHE operates on small regions (tiles)
  2271. in the image. This results in a more balanced equalization, preventing over-amplification
  2272. of contrast in areas with initially low contrast.
  2273. Args:
  2274. clip_limit (tuple[float, float] | float): Controls the contrast enhancement limit.
  2275. - If a single float is provided, the range will be (1, clip_limit).
  2276. - If a tuple of two floats is provided, it defines the range for random selection.
  2277. Higher values allow for more contrast enhancement, but may also increase noise.
  2278. Default: (1, 4)
  2279. tile_grid_size (tuple[int, int]): Defines the number of tiles in the row and column directions.
  2280. Format is (rows, columns). Smaller tile sizes can lead to more localized enhancements,
  2281. while larger sizes give results closer to global histogram equalization.
  2282. Default: (8, 8)
  2283. p (float): Probability of applying the transform. Default: 0.5
  2284. Notes:
  2285. - Supports only RGB or grayscale images.
  2286. - For color images, CLAHE is applied to the L channel in the LAB color space.
  2287. - The clip limit determines the maximum slope of the cumulative histogram. A lower
  2288. clip limit will result in more contrast limiting.
  2289. - Tile grid size affects the adaptiveness of the method. More tiles increase local
  2290. adaptiveness but can lead to an unnatural look if set too high.
  2291. Targets:
  2292. image, volume
  2293. Image types:
  2294. uint8, float32
  2295. Number of channels:
  2296. 1, 3
  2297. Examples:
  2298. >>> import numpy as np
  2299. >>> import albumentations as A
  2300. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  2301. >>> transform = A.CLAHE(clip_limit=(1, 4), tile_grid_size=(8, 8), p=1.0)
  2302. >>> result = transform(image=image)
  2303. >>> clahe_image = result["image"]
  2304. References:
  2305. - Tutorial: https://docs.opencv.org/master/d5/daf/tutorial_py_histogram_equalization.html
  2306. - "Contrast Limited Adaptive Histogram Equalization.": https://ieeexplore.ieee.org/document/109340
  2307. """
  2308. class InitSchema(BaseTransformInitSchema):
  2309. clip_limit: OnePlusFloatRangeType
  2310. tile_grid_size: Annotated[tuple[int, int], AfterValidator(check_range_bounds(1, None))]
  2311. def __init__(
  2312. self,
  2313. clip_limit: tuple[float, float] | float = 4.0,
  2314. tile_grid_size: tuple[int, int] = (8, 8),
  2315. p: float = 0.5,
  2316. ):
  2317. super().__init__(p=p)
  2318. self.clip_limit = cast("tuple[float, float]", clip_limit)
  2319. self.tile_grid_size = tile_grid_size
  2320. def apply(self, img: np.ndarray, clip_limit: float, **params: Any) -> np.ndarray:
  2321. """Apply the CLAHE transform to the input image.
  2322. Args:
  2323. img (np.ndarray): The input image to apply the CLAHE transform to.
  2324. clip_limit (float): The contrast enhancement limit.
  2325. **params (Any): Additional parameters (not used in this transform).
  2326. Returns:
  2327. np.ndarray: The image with the applied CLAHE transform.
  2328. """
  2329. if not is_rgb_image(img) and not is_grayscale_image(img):
  2330. msg = "CLAHE transformation expects 1-channel or 3-channel images."
  2331. raise TypeError(msg)
  2332. return fpixel.clahe(img, clip_limit, self.tile_grid_size)
  2333. def get_params(self) -> dict[str, float]:
  2334. """Generate parameters dependent on the input data.
  2335. Returns:
  2336. dict[str, float]: Dictionary with the following key:
  2337. - "clip_limit" (float): The contrast enhancement limit.
  2338. """
  2339. return {"clip_limit": self.py_random.uniform(*self.clip_limit)}
  2340. class ChannelShuffle(ImageOnlyTransform):
  2341. """Randomly rearrange channels of the image.
  2342. Args:
  2343. p (float): Probability of applying the transform. Default: 0.5.
  2344. Targets:
  2345. image
  2346. Number of channels:
  2347. Any
  2348. Image types:
  2349. uint8, float32
  2350. Examples:
  2351. >>> import numpy as np
  2352. >>> import albumentations as A
  2353. >>>
  2354. >>> # Create a sample image with distinct RGB channels
  2355. >>> image = np.zeros((100, 100, 3), dtype=np.uint8)
  2356. >>> # Red channel (first channel)
  2357. >>> image[:, :, 0] = np.linspace(0, 255, 100, dtype=np.uint8).reshape(1, 100)
  2358. >>> # Green channel (second channel)
  2359. >>> image[:, :, 1] = np.linspace(0, 255, 100, dtype=np.uint8).reshape(100, 1)
  2360. >>> # Blue channel (third channel) - constant value
  2361. >>> image[:, :, 2] = 128
  2362. >>>
  2363. >>> # Apply channel shuffle transform
  2364. >>> transform = A.ChannelShuffle(p=1.0)
  2365. >>> result = transform(image=image)
  2366. >>> shuffled_image = result['image']
  2367. >>>
  2368. >>> # The channels have been randomly rearranged
  2369. >>> # For example, the original order [R, G, B] might become [G, B, R] or [B, R, G]
  2370. >>> # This results in a color shift while preserving all the original image data
  2371. >>> # Note: For images with more than 3 channels, all channels are shuffled similarly
  2372. """
  2373. def apply(
  2374. self,
  2375. img: np.ndarray,
  2376. channels_shuffled: list[int] | None,
  2377. **params: Any,
  2378. ) -> np.ndarray:
  2379. """Apply the ChannelShuffle transform to the input image.
  2380. Args:
  2381. img (np.ndarray): The input image to apply the ChannelShuffle transform to.
  2382. channels_shuffled (list[int] | None): The channels to shuffle.
  2383. **params (Any): Additional parameters (not used in this transform).
  2384. Returns:
  2385. np.ndarray: The image with the applied ChannelShuffle transform.
  2386. """
  2387. if channels_shuffled is None:
  2388. return img
  2389. return fpixel.channel_shuffle(img, channels_shuffled)
  2390. def apply_to_images(self, images: np.ndarray, channels_shuffled: list[int] | None, **params: Any) -> np.ndarray:
  2391. """Apply the ChannelShuffle transform to the input images.
  2392. Args:
  2393. images (np.ndarray): The input images to apply the ChannelShuffle transform to.
  2394. channels_shuffled (list[int] | None): The channels to shuffle.
  2395. **params (Any): Additional parameters (not used in this transform).
  2396. Returns:
  2397. np.ndarray: The images with the applied ChannelShuffle transform.
  2398. """
  2399. if channels_shuffled is None:
  2400. return images
  2401. return fpixel.volume_channel_shuffle(images, channels_shuffled)
  2402. def apply_to_volumes(self, volumes: np.ndarray, channels_shuffled: list[int] | None, **params: Any) -> np.ndarray:
  2403. """Apply the ChannelShuffle transform to the input volumes.
  2404. Args:
  2405. volumes (np.ndarray): The input volumes to apply the ChannelShuffle transform to.
  2406. channels_shuffled (list[int] | None): The channels to shuffle.
  2407. **params (Any): Additional parameters (not used in this transform).
  2408. Returns:
  2409. np.ndarray: The volumes with the applied ChannelShuffle transform.
  2410. """
  2411. if channels_shuffled is None:
  2412. return volumes
  2413. return fpixel.volumes_channel_shuffle(volumes, channels_shuffled)
  2414. def apply_to_volume(self, volume: np.ndarray, channels_shuffled: list[int] | None, **params: Any) -> np.ndarray:
  2415. """Apply the ChannelShuffle transform to the input volume.
  2416. Args:
  2417. volume (np.ndarray): The input volume to apply the ChannelShuffle transform to.
  2418. channels_shuffled (list[int] | None): The channels to shuffle.
  2419. **params (Any): Additional parameters (not used in this transform).
  2420. Returns:
  2421. np.ndarray: The volume with the applied ChannelShuffle transform.
  2422. """
  2423. return self.apply_to_images(volume, channels_shuffled, **params)
  2424. def get_params_dependent_on_data(
  2425. self,
  2426. params: dict[str, Any],
  2427. data: dict[str, Any],
  2428. ) -> dict[str, Any]:
  2429. """Generate parameters dependent on the input data.
  2430. Args:
  2431. params (dict[str, Any]): Parameters from the previous transform.
  2432. data (dict[str, Any]): Input data.
  2433. Returns:
  2434. dict[str, Any]: Dictionary with the following key:
  2435. - "channels_shuffled" (tuple[int, ...] | None): The channels to shuffle.
  2436. """
  2437. shape = params["shape"]
  2438. if len(shape) == 2 or shape[-1] == 1:
  2439. return {"channels_shuffled": None}
  2440. ch_arr = list(range(shape[-1]))
  2441. self.py_random.shuffle(ch_arr)
  2442. return {"channels_shuffled": ch_arr}
  2443. class InvertImg(ImageOnlyTransform):
  2444. """Invert the input image by subtracting pixel values from max values of the image types,
  2445. i.e., 255 for uint8 and 1.0 for float32.
  2446. Args:
  2447. p (float): Probability of applying the transform. Default: 0.5.
  2448. Targets:
  2449. image, volume
  2450. Image types:
  2451. uint8, float32
  2452. Number of channels:
  2453. Any
  2454. Examples:
  2455. >>> import numpy as np
  2456. >>> import albumentations as A
  2457. >>> import cv2
  2458. >>>
  2459. >>> # Create a sample image with different elements
  2460. >>> image = np.zeros((100, 100, 3), dtype=np.uint8)
  2461. >>> cv2.circle(image, (30, 30), 20, (255, 255, 255), -1) # White circle
  2462. >>> cv2.rectangle(image, (60, 60), (90, 90), (128, 128, 128), -1) # Gray rectangle
  2463. >>>
  2464. >>> # Apply InvertImg transform
  2465. >>> transform = A.InvertImg(p=1.0)
  2466. >>> result = transform(image=image)
  2467. >>> inverted_image = result['image']
  2468. >>>
  2469. >>> # Result:
  2470. >>> # - Black background becomes white (0 → 255)
  2471. >>> # - White circle becomes black (255 → 0)
  2472. >>> # - Gray rectangle is inverted (128 → 127)
  2473. >>> # The same approach works for float32 images (0-1 range) and grayscale images
  2474. """
  2475. def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
  2476. """Apply the InvertImg transform to the input image.
  2477. Args:
  2478. img (np.ndarray): The input image to apply the InvertImg transform to.
  2479. **params (Any): Additional parameters (not used in this transform).
  2480. Returns:
  2481. np.ndarray: The image with the applied InvertImg transform.
  2482. """
  2483. return fpixel.invert(img)
  2484. def apply_to_images(self, images: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  2485. """Apply the InvertImg transform to the input images.
  2486. Args:
  2487. images (np.ndarray): The input images to apply the InvertImg transform to.
  2488. *args (Any): Additional arguments (not used in this transform).
  2489. **params (Any): Additional parameters (not used in this transform).
  2490. Returns:
  2491. np.ndarray: The images with the applied InvertImg transform.
  2492. """
  2493. return self.apply(images, *args, **params)
  2494. def apply_to_volumes(self, volumes: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  2495. """Apply the InvertImg transform to the input volumes.
  2496. Args:
  2497. volumes (np.ndarray): The input volumes to apply the InvertImg transform to.
  2498. *args (Any): Additional arguments (not used in this transform).
  2499. **params (Any): Additional parameters (not used in this transform).
  2500. Returns:
  2501. np.ndarray: The volumes with the applied InvertImg transform.
  2502. """
  2503. return self.apply(volumes, *args, **params)
  2504. def apply_to_volume(self, volume: np.ndarray, *args: Any, **params: Any) -> np.ndarray:
  2505. """Apply the InvertImg transform to the input volume.
  2506. Args:
  2507. volume (np.ndarray): The input volume to apply the InvertImg transform to.
  2508. *args (Any): Additional arguments (not used in this transform).
  2509. **params (Any): Additional parameters (not used in this transform).
  2510. Returns:
  2511. np.ndarray: The volume with the applied InvertImg transform.
  2512. """
  2513. return self.apply(volume, *args, **params)
  2514. class RandomGamma(ImageOnlyTransform):
  2515. """Applies random gamma correction to the input image.
  2516. Gamma correction, or simply gamma, is a nonlinear operation used to encode and decode luminance
  2517. or tristimulus values in imaging systems. This transform can adjust the brightness of an image
  2518. while preserving the relative differences between darker and lighter areas, making it useful
  2519. for simulating different lighting conditions or correcting for display characteristics.
  2520. Args:
  2521. gamma_limit (float | tuple[float, float]): If gamma_limit is a single float value, the range
  2522. will be (1, gamma_limit). If it's a tuple of two floats, they will serve as
  2523. the lower and upper bounds for gamma adjustment. Values are in terms of percentage change,
  2524. e.g., (80, 120) means the gamma will be between 80% and 120% of the original.
  2525. Default: (80, 120).
  2526. eps (float): A small value added to the gamma to avoid division by zero or log of zero errors.
  2527. Default: 1e-7.
  2528. p (float): Probability of applying the transform. Default: 0.5.
  2529. Targets:
  2530. image, volume
  2531. Image types:
  2532. uint8, float32
  2533. Number of channels:
  2534. Any
  2535. Note:
  2536. - The gamma correction is applied using the formula: output = input^gamma
  2537. - Gamma values > 1 will make the image darker, while values < 1 will make it brighter
  2538. - This transform is particularly useful for:
  2539. * Simulating different lighting conditions
  2540. * Correcting for non-linear display characteristics
  2541. * Enhancing contrast in certain regions of the image
  2542. * Data augmentation in computer vision tasks
  2543. Mathematical Formulation:
  2544. Let I be the input image and G (gamma) be the correction factor.
  2545. The gamma correction is applied as follows:
  2546. 1. Normalize the image to [0, 1] range: I_norm = I / 255 (for uint8 images)
  2547. 2. Apply gamma correction: I_corrected = I_norm ^ (1 / G)
  2548. 3. Scale back to original range: output = I_corrected * 255 (for uint8 images)
  2549. The actual gamma value used is calculated as:
  2550. G = 1 + (random_value / 100), where random_value is sampled from gamma_limit range.
  2551. Examples:
  2552. >>> import numpy as np
  2553. >>> import albumentations as A
  2554. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  2555. # Default usage
  2556. >>> transform = A.RandomGamma(p=1.0)
  2557. >>> augmented_image = transform(image=image)["image"]
  2558. # Custom gamma range
  2559. >>> transform = A.RandomGamma(gamma_limit=(50, 150), p=1.0)
  2560. >>> augmented_image = transform(image=image)["image"]
  2561. # Applying with other transforms
  2562. >>> transform = A.Compose([
  2563. ... A.RandomGamma(gamma_limit=(80, 120), p=0.5),
  2564. ... A.RandomBrightnessContrast(p=0.5),
  2565. ... ])
  2566. >>> augmented_image = transform(image=image)["image"]
  2567. References:
  2568. - Gamma correction: https://en.wikipedia.org/wiki/Gamma_correction
  2569. - Power law (Gamma) encoding: https://www.cambridgeincolour.com/tutorials/gamma-correction.htm
  2570. """
  2571. class InitSchema(BaseTransformInitSchema):
  2572. gamma_limit: OnePlusFloatRangeType
  2573. def __init__(
  2574. self,
  2575. gamma_limit: tuple[float, float] | float = (80, 120),
  2576. p: float = 0.5,
  2577. ):
  2578. super().__init__(p=p)
  2579. self.gamma_limit = cast("tuple[float, float]", gamma_limit)
  2580. def apply(self, img: np.ndarray, gamma: float, **params: Any) -> np.ndarray:
  2581. """Apply the RandomGamma transform to the input image.
  2582. Args:
  2583. img (np.ndarray): The input image to apply the RandomGamma transform to.
  2584. gamma (float): The gamma value.
  2585. **params (Any): Additional parameters (not used in this transform).
  2586. Returns:
  2587. np.ndarray: The image with the applied RandomGamma transform.
  2588. """
  2589. return fpixel.gamma_transform(img, gamma=gamma)
  2590. def apply_to_volume(self, volume: np.ndarray, gamma: float, **params: Any) -> np.ndarray:
  2591. """Apply the RandomGamma transform to the input volume.
  2592. Args:
  2593. volume (np.ndarray): The input volume to apply the RandomGamma transform to.
  2594. gamma (float): The gamma value.
  2595. **params (Any): Additional parameters (not used in this transform).
  2596. Returns:
  2597. np.ndarray: The volume with the applied RandomGamma transform.
  2598. """
  2599. return self.apply(volume, gamma=gamma)
  2600. def apply_to_volumes(self, volumes: np.ndarray, gamma: float, **params: Any) -> np.ndarray:
  2601. """Apply the RandomGamma transform to the input volumes.
  2602. Args:
  2603. volumes (np.ndarray): The input volumes to apply the RandomGamma transform to.
  2604. gamma (float): The gamma value.
  2605. **params (Any): Additional parameters (not used in this transform).
  2606. Returns:
  2607. np.ndarray: The volumes with the applied RandomGamma transform.
  2608. """
  2609. return self.apply(volumes, gamma=gamma)
  2610. def apply_to_images(self, images: np.ndarray, gamma: float, **params: Any) -> np.ndarray:
  2611. """Apply the RandomGamma transform to the input images.
  2612. Args:
  2613. images (np.ndarray): The input images to apply the RandomGamma transform to.
  2614. gamma (float): The gamma value.
  2615. **params (Any): Additional parameters (not used in this transform).
  2616. Returns:
  2617. np.ndarray: The images with the applied RandomGamma transform.
  2618. """
  2619. return self.apply(images, gamma=gamma)
  2620. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  2621. """Generate parameters dependent on the input data.
  2622. Args:
  2623. params (dict[str, Any]): Parameters from the previous transform.
  2624. data (dict[str, Any]): Input data.
  2625. Returns:
  2626. dict[str, Any]: Dictionary with the following key:
  2627. - "gamma" (float): The gamma value.
  2628. """
  2629. return {
  2630. "gamma": self.py_random.uniform(*self.gamma_limit) / 100.0,
  2631. }
  2632. class ToGray(ImageOnlyTransform):
  2633. """Convert an image to grayscale and optionally replicate the grayscale channel.
  2634. This transform first converts a color image to a single-channel grayscale image using various methods,
  2635. then replicates the grayscale channel if num_output_channels is greater than 1.
  2636. Args:
  2637. num_output_channels (int): The number of channels in the output image. If greater than 1,
  2638. the grayscale channel will be replicated. Default: 3.
  2639. method (Literal["weighted_average", "from_lab", "desaturation", "average", "max", "pca"]):
  2640. The method used for grayscale conversion:
  2641. - "weighted_average": Uses a weighted sum of RGB channels (0.299R + 0.587G + 0.114B).
  2642. Works only with 3-channel images. Provides realistic results based on human perception.
  2643. - "from_lab": Extracts the L channel from the LAB color space.
  2644. Works only with 3-channel images. Gives perceptually uniform results.
  2645. - "desaturation": Averages the maximum and minimum values across channels.
  2646. Works with any number of channels. Fast but may not preserve perceived brightness well.
  2647. - "average": Simple average of all channels.
  2648. Works with any number of channels. Fast but may not give realistic results.
  2649. - "max": Takes the maximum value across all channels.
  2650. Works with any number of channels. Tends to produce brighter results.
  2651. - "pca": Applies Principal Component Analysis to reduce channels.
  2652. Works with any number of channels. Can preserve more information but is computationally intensive.
  2653. p (float): Probability of applying the transform. Default: 0.5.
  2654. Raises:
  2655. TypeError: If the input image doesn't have 3 channels for methods that require it.
  2656. Note:
  2657. - The transform first converts the input image to single-channel grayscale, then replicates
  2658. this channel if num_output_channels > 1.
  2659. - "weighted_average" and "from_lab" are typically used in image processing and computer vision
  2660. applications where accurate representation of human perception is important.
  2661. - "desaturation" and "average" are often used in simple image manipulation tools or when
  2662. computational speed is a priority.
  2663. - "max" method can be useful in scenarios where preserving bright features is important,
  2664. such as in some medical imaging applications.
  2665. - "pca" might be used in advanced image analysis tasks or when dealing with hyperspectral images.
  2666. Image types:
  2667. uint8, float32
  2668. Returns:
  2669. np.ndarray: Grayscale image with the specified number of channels.
  2670. Examples:
  2671. >>> import numpy as np
  2672. >>> import albumentations as A
  2673. >>> import cv2
  2674. >>>
  2675. >>> # Create a sample color image with distinct RGB values
  2676. >>> image = np.zeros((100, 100, 3), dtype=np.uint8)
  2677. >>> # Red square in top-left
  2678. >>> image[10:40, 10:40, 0] = 200
  2679. >>> # Green square in top-right
  2680. >>> image[10:40, 60:90, 1] = 200
  2681. >>> # Blue square in bottom-left
  2682. >>> image[60:90, 10:40, 2] = 200
  2683. >>> # Yellow square in bottom-right (Red + Green)
  2684. >>> image[60:90, 60:90, 0] = 200
  2685. >>> image[60:90, 60:90, 1] = 200
  2686. >>>
  2687. >>> # Example 1: Default conversion (weighted average, 3 channels)
  2688. >>> transform = A.ToGray(p=1.0)
  2689. >>> result = transform(image=image)
  2690. >>> gray_image = result['image']
  2691. >>> # Output has 3 duplicate channels with values based on RGB perception weights
  2692. >>> # R=0.299, G=0.587, B=0.114
  2693. >>> assert gray_image.shape == (100, 100, 3)
  2694. >>> assert np.allclose(gray_image[:, :, 0], gray_image[:, :, 1])
  2695. >>> assert np.allclose(gray_image[:, :, 1], gray_image[:, :, 2])
  2696. >>>
  2697. >>> # Example 2: Single-channel output
  2698. >>> transform = A.ToGray(num_output_channels=1, p=1.0)
  2699. >>> result = transform(image=image)
  2700. >>> gray_image = result['image']
  2701. >>> assert gray_image.shape == (100, 100, 1)
  2702. >>>
  2703. >>> # Example 3: Using different conversion methods
  2704. >>> # "desaturation" method (min+max)/2
  2705. >>> transform_desaturate = A.ToGray(
  2706. ... method="desaturation",
  2707. ... p=1.0
  2708. ... )
  2709. >>> result = transform_desaturate(image=image)
  2710. >>> gray_desaturate = result['image']
  2711. >>>
  2712. >>> # "from_lab" method (using L channel from LAB colorspace)
  2713. >>> transform_lab = A.ToGray(
  2714. ... method="from_lab",
  2715. ... p=1.0
  2716. >>> )
  2717. >>> result = transform_lab(image=image)
  2718. >>> gray_lab = result['image']
  2719. >>>
  2720. >>> # "average" method (simple average of channels)
  2721. >>> transform_avg = A.ToGray(
  2722. ... method="average",
  2723. ... p=1.0
  2724. >>> )
  2725. >>> result = transform_avg(image=image)
  2726. >>> gray_avg = result['image']
  2727. >>>
  2728. >>> # "max" method (takes max value across channels)
  2729. >>> transform_max = A.ToGray(
  2730. ... method="max",
  2731. ... p=1.0
  2732. >>> )
  2733. >>> result = transform_max(image=image)
  2734. >>> gray_max = result['image']
  2735. >>>
  2736. >>> # Example 4: Using grayscale in an augmentation pipeline
  2737. >>> pipeline = A.Compose([
  2738. ... A.ToGray(p=0.5), # 50% chance of grayscale conversion
  2739. ... A.RandomBrightnessContrast(p=1.0) # Always apply brightness/contrast
  2740. ... ])
  2741. >>> result = pipeline(image=image)
  2742. >>> augmented_image = result['image'] # May be grayscale or color
  2743. >>>
  2744. >>> # Example 5: Converting float32 image
  2745. >>> float_image = image.astype(np.float32) / 255.0 # Range [0, 1]
  2746. >>> transform = A.ToGray(p=1.0)
  2747. >>> result = transform(image=float_image)
  2748. >>> gray_float_image = result['image']
  2749. >>> assert gray_float_image.dtype == np.float32
  2750. >>> assert gray_float_image.max() <= 1.0
  2751. """
  2752. class InitSchema(BaseTransformInitSchema):
  2753. num_output_channels: int = Field(
  2754. description="The number of output channels.",
  2755. ge=1,
  2756. )
  2757. method: Literal[
  2758. "weighted_average",
  2759. "from_lab",
  2760. "desaturation",
  2761. "average",
  2762. "max",
  2763. "pca",
  2764. ]
  2765. def __init__(
  2766. self,
  2767. num_output_channels: int = 3,
  2768. method: Literal[
  2769. "weighted_average",
  2770. "from_lab",
  2771. "desaturation",
  2772. "average",
  2773. "max",
  2774. "pca",
  2775. ] = "weighted_average",
  2776. p: float = 0.5,
  2777. ):
  2778. super().__init__(p=p)
  2779. self.num_output_channels = num_output_channels
  2780. self.method = method
  2781. def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
  2782. """Apply the ToGray transform to the input image.
  2783. Args:
  2784. img (np.ndarray): The input image to apply the ToGray transform to.
  2785. **params (Any): Additional parameters (not used in this transform).
  2786. Returns:
  2787. np.ndarray: The image with the applied ToGray transform.
  2788. """
  2789. if is_grayscale_image(img):
  2790. warnings.warn("The image is already gray.", stacklevel=2)
  2791. return img
  2792. num_channels = get_num_channels(img)
  2793. if num_channels != NUM_RGB_CHANNELS and self.method not in {
  2794. "desaturation",
  2795. "average",
  2796. "max",
  2797. "pca",
  2798. }:
  2799. msg = "ToGray transformation expects 3-channel images."
  2800. raise TypeError(msg)
  2801. return fpixel.to_gray(img, self.num_output_channels, self.method)
  2802. class ToRGB(ImageOnlyTransform):
  2803. """Convert an input image from grayscale to RGB format.
  2804. Args:
  2805. num_output_channels (int): The number of channels in the output image. Default: 3.
  2806. p (float): Probability of applying the transform. Default: 1.0.
  2807. Targets:
  2808. image, volume
  2809. Image types:
  2810. uint8, float32
  2811. Number of channels:
  2812. 1
  2813. Note:
  2814. - For single-channel (grayscale) images, the channel is replicated to create an RGB image.
  2815. - If the input is already a 3-channel RGB image, it is returned unchanged.
  2816. - This transform does not change the data type of the image (e.g., uint8 remains uint8).
  2817. Raises:
  2818. TypeError: If the input image has more than 1 channel.
  2819. Examples:
  2820. >>> import numpy as np
  2821. >>> import albumentations as A
  2822. >>>
  2823. >>> # Convert a grayscale image to RGB
  2824. >>> transform = A.Compose([A.ToRGB(p=1.0)])
  2825. >>> grayscale_image = np.random.randint(0, 256, (100, 100), dtype=np.uint8)
  2826. >>> rgb_image = transform(image=grayscale_image)['image']
  2827. >>> assert rgb_image.shape == (100, 100, 3)
  2828. """
  2829. class InitSchema(BaseTransformInitSchema):
  2830. num_output_channels: int = Field(ge=1)
  2831. def __init__(
  2832. self,
  2833. num_output_channels: int = 3,
  2834. p: float = 1.0,
  2835. ):
  2836. super().__init__(p=p)
  2837. self.num_output_channels = num_output_channels
  2838. def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
  2839. """Apply the ToRGB transform to the input image.
  2840. Args:
  2841. img (np.ndarray): The input image to apply the ToRGB transform to.
  2842. **params (Any): Additional parameters (not used in this transform).
  2843. Returns:
  2844. np.ndarray: The image with the applied ToRGB transform.
  2845. """
  2846. if is_rgb_image(img):
  2847. warnings.warn("The image is already an RGB.", stacklevel=2)
  2848. return np.ascontiguousarray(img)
  2849. if not is_grayscale_image(img):
  2850. msg = "ToRGB transformation expects 2-dim images or 3-dim with the last dimension equal to 1."
  2851. raise TypeError(msg)
  2852. return fpixel.grayscale_to_multichannel(
  2853. img,
  2854. num_output_channels=self.num_output_channels,
  2855. )
  2856. class ToSepia(ImageOnlyTransform):
  2857. """Apply a sepia filter to the input image.
  2858. This transform converts a color image to a sepia tone, giving it a warm, brownish tint
  2859. that is reminiscent of old photographs. The sepia effect is achieved by applying a
  2860. specific color transformation matrix to the RGB channels of the input image.
  2861. For grayscale images, the transform is a no-op and returns the original image.
  2862. Args:
  2863. p (float): Probability of applying the transform. Default: 0.5.
  2864. Targets:
  2865. image, volume
  2866. Image types:
  2867. uint8, float32
  2868. Number of channels:
  2869. 1,3
  2870. Note:
  2871. - The sepia effect only works with RGB images (3 channels). For grayscale images,
  2872. the original image is returned unchanged since the sepia transformation would
  2873. have no visible effect when R=G=B.
  2874. - The sepia effect is created using a fixed color transformation matrix:
  2875. [[0.393, 0.769, 0.189],
  2876. [0.349, 0.686, 0.168],
  2877. [0.272, 0.534, 0.131]]
  2878. - The output image will have the same data type as the input image.
  2879. - For float32 images, ensure the input values are in the range [0, 1].
  2880. Examples:
  2881. >>> import numpy as np
  2882. >>> import albumentations as A
  2883. >>>
  2884. # Apply sepia effect to a uint8 RGB image
  2885. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  2886. >>> transform = A.ToSepia(p=1.0)
  2887. >>> sepia_image = transform(image=image)['image']
  2888. >>> assert sepia_image.shape == image.shape
  2889. >>> assert sepia_image.dtype == np.uint8
  2890. >>>
  2891. # Apply sepia effect to a float32 RGB image
  2892. >>> image = np.random.rand(100, 100, 3).astype(np.float32)
  2893. >>> transform = A.ToSepia(p=1.0)
  2894. >>> sepia_image = transform(image=image)['image']
  2895. >>> assert sepia_image.shape == image.shape
  2896. >>> assert sepia_image.dtype == np.float32
  2897. >>> assert 0 <= sepia_image.min() <= sepia_image.max() <= 1.0
  2898. >>>
  2899. # No effect on grayscale images
  2900. >>> gray_image = np.random.randint(0, 256, (100, 100), dtype=np.uint8)
  2901. >>> transform = A.ToSepia(p=1.0)
  2902. >>> result = transform(image=gray_image)['image']
  2903. >>> assert np.array_equal(result, gray_image)
  2904. Mathematical Formulation:
  2905. Given an input pixel [R, G, B], the sepia tone is calculated as:
  2906. R_sepia = 0.393*R + 0.769*G + 0.189*B
  2907. G_sepia = 0.349*R + 0.686*G + 0.168*B
  2908. B_sepia = 0.272*R + 0.534*G + 0.131*B
  2909. For grayscale images where R=G=B, this transformation would result in a simple
  2910. scaling of the original value, so we skip it.
  2911. The output values are clipped to the valid range for the image's data type.
  2912. See Also:
  2913. ToGray: For converting images to grayscale instead of sepia.
  2914. """
  2915. def __init__(self, p: float = 0.5):
  2916. super().__init__(p=p)
  2917. self.sepia_transformation_matrix = np.array(
  2918. [[0.393, 0.769, 0.189], [0.349, 0.686, 0.168], [0.272, 0.534, 0.131]],
  2919. )
  2920. def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
  2921. """Apply the ToSepia transform to the input image.
  2922. Args:
  2923. img (np.ndarray): The input image to apply the ToSepia transform to.
  2924. **params (Any): Additional parameters (not used in this transform).
  2925. Returns:
  2926. np.ndarray: The image with the applied ToSepia transform.
  2927. """
  2928. if is_grayscale_image(img):
  2929. return img
  2930. if not is_rgb_image(img):
  2931. msg = "ToSepia transformation expects 1 or 3-channel images."
  2932. raise TypeError(msg)
  2933. return fpixel.linear_transformation_rgb(img, self.sepia_transformation_matrix)
  2934. class InterpolationPydantic(BaseModel):
  2935. upscale: Literal[
  2936. cv2.INTER_NEAREST,
  2937. cv2.INTER_NEAREST_EXACT,
  2938. cv2.INTER_LINEAR,
  2939. cv2.INTER_CUBIC,
  2940. cv2.INTER_AREA,
  2941. cv2.INTER_LANCZOS4,
  2942. cv2.INTER_LINEAR_EXACT,
  2943. ]
  2944. downscale: Literal[
  2945. cv2.INTER_NEAREST,
  2946. cv2.INTER_NEAREST_EXACT,
  2947. cv2.INTER_LINEAR,
  2948. cv2.INTER_CUBIC,
  2949. cv2.INTER_AREA,
  2950. cv2.INTER_LANCZOS4,
  2951. cv2.INTER_LINEAR_EXACT,
  2952. ]
  2953. class Downscale(ImageOnlyTransform):
  2954. """Decrease image quality by downscaling and upscaling back.
  2955. This transform simulates the effect of a low-resolution image by first downscaling
  2956. the image to a lower resolution and then upscaling it back to its original size.
  2957. This process introduces loss of detail and can be used to simulate low-quality
  2958. images or to test the robustness of models to different image resolutions.
  2959. Args:
  2960. scale_range (tuple[float, float]): Range for the downscaling factor.
  2961. Should be two float values between 0 and 1, where the first value is less than or equal to the second.
  2962. The actual downscaling factor will be randomly chosen from this range for each image.
  2963. Lower values result in more aggressive downscaling.
  2964. Default: (0.25, 0.25)
  2965. interpolation_pair (dict[Literal["downscale", "upscale"], int]): A dictionary specifying
  2966. the interpolation methods to use for downscaling and upscaling.
  2967. Should contain two keys:
  2968. - 'downscale': Interpolation method for downscaling
  2969. - 'upscale': Interpolation method for upscaling
  2970. Values should be OpenCV interpolation flags (e.g., cv2.INTER_NEAREST, cv2.INTER_LINEAR, etc.)
  2971. Default: {'downscale': cv2.INTER_NEAREST, 'upscale': cv2.INTER_NEAREST}
  2972. p (float): Probability of applying the transform. Should be in the range [0, 1].
  2973. Default: 0.5
  2974. Targets:
  2975. image, volume
  2976. Image types:
  2977. uint8, float32
  2978. Note:
  2979. - The actual downscaling factor is randomly chosen for each image from the range
  2980. specified in scale_range.
  2981. - Using different interpolation methods for downscaling and upscaling can produce
  2982. various effects. For example, using INTER_NEAREST for both can create a pixelated look,
  2983. while using INTER_LINEAR or INTER_CUBIC can produce smoother results.
  2984. - This transform can be useful for data augmentation, especially when training models
  2985. that need to be robust to variations in image quality or resolution.
  2986. Examples:
  2987. >>> import albumentations as A
  2988. >>> import cv2
  2989. >>> transform = A.Downscale(
  2990. ... scale_range=(0.5, 0.75),
  2991. ... interpolation_pair={'downscale': cv2.INTER_NEAREST, 'upscale': cv2.INTER_LINEAR},
  2992. ... p=0.5
  2993. ... )
  2994. >>> transformed = transform(image=image)
  2995. >>> downscaled_image = transformed['image']
  2996. """
  2997. class InitSchema(BaseTransformInitSchema):
  2998. interpolation_pair: dict[
  2999. Literal["downscale", "upscale"],
  3000. Literal[
  3001. cv2.INTER_NEAREST,
  3002. cv2.INTER_NEAREST_EXACT,
  3003. cv2.INTER_LINEAR,
  3004. cv2.INTER_CUBIC,
  3005. cv2.INTER_AREA,
  3006. cv2.INTER_LANCZOS4,
  3007. cv2.INTER_LINEAR_EXACT,
  3008. ],
  3009. ]
  3010. scale_range: Annotated[
  3011. tuple[float, float],
  3012. AfterValidator(check_range_bounds(0, 1)),
  3013. AfterValidator(nondecreasing),
  3014. ]
  3015. def __init__(
  3016. self,
  3017. scale_range: tuple[float, float] = (0.25, 0.25),
  3018. interpolation_pair: dict[
  3019. Literal["downscale", "upscale"],
  3020. Literal[
  3021. cv2.INTER_NEAREST,
  3022. cv2.INTER_NEAREST_EXACT,
  3023. cv2.INTER_LINEAR,
  3024. cv2.INTER_CUBIC,
  3025. cv2.INTER_AREA,
  3026. cv2.INTER_LANCZOS4,
  3027. cv2.INTER_LINEAR_EXACT,
  3028. ],
  3029. ] = {"upscale": cv2.INTER_NEAREST, "downscale": cv2.INTER_NEAREST},
  3030. p: float = 0.5,
  3031. ):
  3032. super().__init__(p=p)
  3033. self.scale_range = scale_range
  3034. self.interpolation_pair = interpolation_pair
  3035. def apply(self, img: np.ndarray, scale: float, **params: Any) -> np.ndarray:
  3036. """Apply the Downscale transform to the input image.
  3037. Args:
  3038. img (np.ndarray): The input image to apply the Downscale transform to.
  3039. scale (float): The downscaling factor.
  3040. **params (Any): Additional parameters (not used in this transform).
  3041. Returns:
  3042. np.ndarray: The image with the applied Downscale transform.
  3043. """
  3044. return fpixel.downscale(
  3045. img,
  3046. scale=scale,
  3047. down_interpolation=self.interpolation_pair["downscale"],
  3048. up_interpolation=self.interpolation_pair["upscale"],
  3049. )
  3050. def get_params(self) -> dict[str, Any]:
  3051. """Generate parameters dependent on the input data.
  3052. Returns:
  3053. dict[str, Any]: Dictionary with the following key:
  3054. - "scale" (float): The downscaling factor.
  3055. """
  3056. return {"scale": self.py_random.uniform(*self.scale_range)}
  3057. class MultiplicativeNoise(ImageOnlyTransform):
  3058. """Apply multiplicative noise to the input image.
  3059. This transform multiplies each pixel in the image by a random value or array of values,
  3060. effectively creating a noise pattern that scales with the image intensity.
  3061. Args:
  3062. multiplier (tuple[float, float]): The range for the random multiplier.
  3063. Defines the range from which the multiplier is sampled.
  3064. Default: (0.9, 1.1)
  3065. per_channel (bool): If True, use a different random multiplier for each channel.
  3066. If False, use the same multiplier for all channels.
  3067. Setting this to False is slightly faster.
  3068. Default: False
  3069. elementwise (bool): If True, generates a unique multiplier for each pixel.
  3070. If False, generates a single multiplier (or one per channel if per_channel=True).
  3071. Default: False
  3072. p (float): Probability of applying the transform. Default: 0.5
  3073. Targets:
  3074. image, volume
  3075. Image types:
  3076. uint8, float32
  3077. Number of channels:
  3078. Any
  3079. Note:
  3080. - When elementwise=False and per_channel=False, a single multiplier is applied to the entire image.
  3081. - When elementwise=False and per_channel=True, each channel gets a different multiplier.
  3082. - When elementwise=True and per_channel=False, each pixel gets the same multiplier across all channels.
  3083. - When elementwise=True and per_channel=True, each pixel in each channel gets a unique multiplier.
  3084. - Setting per_channel=False is slightly faster, especially for larger images.
  3085. - This transform can be used to simulate various lighting conditions or to create noise that
  3086. scales with image intensity.
  3087. Examples:
  3088. >>> import numpy as np
  3089. >>> import albumentations as A
  3090. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  3091. >>> transform = A.MultiplicativeNoise(multiplier=(0.9, 1.1), per_channel=True, p=1.0)
  3092. >>> result = transform(image=image)
  3093. >>> noisy_image = result["image"]
  3094. References:
  3095. Multiplicative noise: https://en.wikipedia.org/wiki/Multiplicative_noise
  3096. """
  3097. class InitSchema(BaseTransformInitSchema):
  3098. multiplier: Annotated[
  3099. tuple[float, float],
  3100. AfterValidator(check_range_bounds(0, None)),
  3101. AfterValidator(nondecreasing),
  3102. ]
  3103. per_channel: bool
  3104. elementwise: bool
  3105. def __init__(
  3106. self,
  3107. multiplier: tuple[float, float] | float = (0.9, 1.1),
  3108. per_channel: bool = False,
  3109. elementwise: bool = False,
  3110. p: float = 0.5,
  3111. ):
  3112. super().__init__(p=p)
  3113. self.multiplier = cast("tuple[float, float]", multiplier)
  3114. self.elementwise = elementwise
  3115. self.per_channel = per_channel
  3116. def apply(
  3117. self,
  3118. img: np.ndarray,
  3119. multiplier: float | np.ndarray,
  3120. **kwargs: Any,
  3121. ) -> np.ndarray:
  3122. """Apply the MultiplicativeNoise transform to the input image.
  3123. Args:
  3124. img (np.ndarray): The input image to apply the MultiplicativeNoise transform to.
  3125. multiplier (float | np.ndarray): The random multiplier.
  3126. **kwargs (Any): Additional parameters (not used in this transform).
  3127. Returns:
  3128. np.ndarray: The image with the applied MultiplicativeNoise transform.
  3129. """
  3130. return multiply(img, multiplier)
  3131. def get_params_dependent_on_data(
  3132. self,
  3133. params: dict[str, Any],
  3134. data: dict[str, Any],
  3135. ) -> dict[str, Any]:
  3136. """Generate parameters dependent on the input data.
  3137. Args:
  3138. params (dict[str, Any]): The parameters of the transform.
  3139. data (dict[str, Any]): The data to apply the transform to.
  3140. Returns:
  3141. dict[str, Any]: The parameters of the transform.
  3142. """
  3143. image = data["image"] if "image" in data else data["images"][0]
  3144. num_channels = get_num_channels(image)
  3145. if self.elementwise:
  3146. shape = image.shape if self.per_channel else (*image.shape[:2], 1)
  3147. else:
  3148. shape = (num_channels,) if self.per_channel else (1,)
  3149. multiplier = self.random_generator.uniform(
  3150. self.multiplier[0],
  3151. self.multiplier[1],
  3152. shape,
  3153. ).astype(np.float32)
  3154. if not self.per_channel and num_channels > 1:
  3155. # Replicate the multiplier for all channels if not per_channel
  3156. multiplier = np.repeat(multiplier, num_channels, axis=-1)
  3157. if not self.elementwise and self.per_channel:
  3158. # Reshape to broadcast correctly when not elementwise but per_channel
  3159. multiplier = multiplier.reshape(1, 1, -1)
  3160. if multiplier.shape != image.shape:
  3161. multiplier = multiplier.squeeze()
  3162. return {"multiplier": multiplier}
  3163. class FancyPCA(ImageOnlyTransform):
  3164. """Apply Fancy PCA augmentation to the input image.
  3165. This augmentation technique applies PCA (Principal Component Analysis) to the image's color channels,
  3166. then adds multiples of the principal components to the image, with magnitudes proportional to the
  3167. corresponding eigenvalues times a random variable drawn from a Gaussian with mean 0 and standard
  3168. deviation 'alpha'.
  3169. Args:
  3170. alpha (float): Standard deviation of the Gaussian distribution used to generate
  3171. random noise for each principal component. Default: 0.1.
  3172. p (float): Probability of applying the transform. Default: 0.5.
  3173. Targets:
  3174. image, volume
  3175. Image types:
  3176. uint8, float32
  3177. Number of channels:
  3178. any
  3179. Note:
  3180. - This augmentation is particularly effective for RGB images but can work with any number of channels.
  3181. - For grayscale images, it applies a simplified version of the augmentation.
  3182. - The transform preserves the mean of the image while adjusting the color/intensity variation.
  3183. - This implementation is based on the paper by Krizhevsky et al. and is similar to the one used
  3184. in the original AlexNet paper.
  3185. Examples:
  3186. >>> import numpy as np
  3187. >>> import albumentations as A
  3188. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  3189. >>> transform = A.FancyPCA(alpha=0.1, p=1.0)
  3190. >>> result = transform(image=image)
  3191. >>> augmented_image = result["image"]
  3192. References:
  3193. ImageNet Classification with Deep Convolutional Neural Networks: In Advances in Neural Information
  3194. Processing Systems (Vol. 25). Curran Associates, Inc.
  3195. """
  3196. class InitSchema(BaseTransformInitSchema):
  3197. alpha: float = Field(ge=0)
  3198. def __init__(
  3199. self,
  3200. alpha: float = 0.1,
  3201. p: float = 0.5,
  3202. ):
  3203. super().__init__(p=p)
  3204. self.alpha = alpha
  3205. def apply(
  3206. self,
  3207. img: np.ndarray,
  3208. alpha_vector: np.ndarray,
  3209. **params: Any,
  3210. ) -> np.ndarray:
  3211. """Apply the FancyPCA transform to the input image.
  3212. Args:
  3213. img (np.ndarray): The input image to apply the FancyPCA transform to.
  3214. alpha_vector (np.ndarray): The random noise for each principal component.
  3215. **params (Any): Additional parameters (not used in this transform).
  3216. Returns:
  3217. np.ndarray: The image with the applied FancyPCA transform.
  3218. """
  3219. return fpixel.fancy_pca(img, alpha_vector)
  3220. def get_params_dependent_on_data(
  3221. self,
  3222. params: dict[str, Any],
  3223. data: dict[str, Any],
  3224. ) -> dict[str, Any]:
  3225. """Generate parameters dependent on the input data.
  3226. Args:
  3227. params (dict[str, Any]): The parameters of the transform.
  3228. data (dict[str, Any]): The data to apply the transform to.
  3229. Returns:
  3230. dict[str, Any]: The parameters of the transform.
  3231. """
  3232. shape = params["shape"]
  3233. num_channels = shape[-1] if len(shape) == NUM_MULTI_CHANNEL_DIMENSIONS else 1
  3234. alpha_vector = self.random_generator.normal(0, self.alpha, num_channels).astype(
  3235. np.float32,
  3236. )
  3237. return {"alpha_vector": alpha_vector}
  3238. class ColorJitter(ImageOnlyTransform):
  3239. """Randomly changes the brightness, contrast, saturation, and hue of an image.
  3240. This transform is similar to torchvision's ColorJitter but with some differences due to the use of OpenCV
  3241. instead of Pillow. The main differences are:
  3242. 1. OpenCV and Pillow use different formulas to convert images to HSV format.
  3243. 2. This implementation uses value saturation instead of uint8 overflow as in Pillow.
  3244. These differences may result in slightly different output compared to torchvision's ColorJitter.
  3245. Args:
  3246. brightness (tuple[float, float] | float): How much to jitter brightness.
  3247. If float:
  3248. The brightness factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
  3249. If tuple:
  3250. The brightness factor is sampled from the range specified.
  3251. Should be non-negative numbers.
  3252. Default: (0.8, 1.2)
  3253. contrast (tuple[float, float] | float): How much to jitter contrast.
  3254. If float:
  3255. The contrast factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
  3256. If tuple:
  3257. The contrast factor is sampled from the range specified.
  3258. Should be non-negative numbers.
  3259. Default: (0.8, 1.2)
  3260. saturation (tuple[float, float] | float): How much to jitter saturation.
  3261. If float:
  3262. The saturation factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
  3263. If tuple:
  3264. The saturation factor is sampled from the range specified.
  3265. Should be non-negative numbers.
  3266. Default: (0.8, 1.2)
  3267. hue (float or tuple of float (min, max)): How much to jitter hue.
  3268. If float:
  3269. The hue factor is chosen uniformly from [-hue, hue]. Should have 0 <= hue <= 0.5.
  3270. If tuple:
  3271. The hue factor is sampled from the range specified. Values should be in range [-0.5, 0.5].
  3272. Default: (-0.5, 0.5)
  3273. p (float): Probability of applying the transform. Should be in the range [0, 1].
  3274. Default: 0.5
  3275. Targets:
  3276. image, volume
  3277. Image types:
  3278. uint8, float32
  3279. Number of channels:
  3280. 1, 3
  3281. Note:
  3282. - The order of application for these color transformations is random for each image.
  3283. - The ranges for brightness, contrast, and saturation are applied as multiplicative factors.
  3284. - The range for hue is applied as an additive factor.
  3285. Examples:
  3286. >>> import numpy as np
  3287. >>> import albumentations as A
  3288. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  3289. >>> transform = A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=1.0)
  3290. >>> result = transform(image=image)
  3291. >>> jittered_image = result['image']
  3292. References:
  3293. - ColorJitter: https://pytorch.org/vision/stable/generated/torchvision.transforms.ColorJitter.html
  3294. - Color Conversions: https://docs.opencv.org/3.4/de/d25/imgproc_color_conversions.html
  3295. """
  3296. class InitSchema(BaseTransformInitSchema):
  3297. brightness: tuple[float, float] | float
  3298. contrast: tuple[float, float] | float
  3299. saturation: tuple[float, float] | float
  3300. hue: tuple[float, float] | float
  3301. @field_validator("brightness", "contrast", "saturation", "hue")
  3302. @classmethod
  3303. def _check_ranges(
  3304. cls,
  3305. value: tuple[float, float] | float,
  3306. info: ValidationInfo,
  3307. ) -> tuple[float, float]:
  3308. if info.field_name == "hue":
  3309. bounds = -0.5, 0.5
  3310. bias = 0
  3311. clip = False
  3312. elif info.field_name in ["brightness", "contrast", "saturation"]:
  3313. bounds = 0, float("inf")
  3314. bias = 1
  3315. clip = True
  3316. if isinstance(value, numbers.Number):
  3317. if value < 0:
  3318. raise ValueError(
  3319. f"If {info.field_name} is a single number, it must be non negative.",
  3320. )
  3321. left = bias - value
  3322. if clip:
  3323. left = max(left, 0)
  3324. value = (left, bias + value)
  3325. elif isinstance(value, tuple) and len(value) == PAIR:
  3326. check_range(value, *bounds, info.field_name)
  3327. return cast("tuple[float, float]", value)
  3328. def __init__(
  3329. self,
  3330. brightness: tuple[float, float] | float = (0.8, 1.2),
  3331. contrast: tuple[float, float] | float = (0.8, 1.2),
  3332. saturation: tuple[float, float] | float = (0.8, 1.2),
  3333. hue: tuple[float, float] | float = (-0.5, 0.5),
  3334. p: float = 0.5,
  3335. ):
  3336. super().__init__(p=p)
  3337. self.brightness = cast("tuple[float, float]", brightness)
  3338. self.contrast = cast("tuple[float, float]", contrast)
  3339. self.saturation = cast("tuple[float, float]", saturation)
  3340. self.hue = cast("tuple[float, float]", hue)
  3341. self.transforms = [
  3342. fpixel.adjust_brightness_torchvision,
  3343. fpixel.adjust_contrast_torchvision,
  3344. fpixel.adjust_saturation_torchvision,
  3345. fpixel.adjust_hue_torchvision,
  3346. ]
  3347. def get_params(self) -> dict[str, Any]:
  3348. """Generate parameters for the ColorJitter transform.
  3349. Returns:
  3350. dict[str, Any]: The parameters of the transform.
  3351. """
  3352. brightness = self.py_random.uniform(*self.brightness)
  3353. contrast = self.py_random.uniform(*self.contrast)
  3354. saturation = self.py_random.uniform(*self.saturation)
  3355. hue = self.py_random.uniform(*self.hue)
  3356. order = [0, 1, 2, 3]
  3357. self.random_generator.shuffle(order)
  3358. return {
  3359. "brightness": brightness,
  3360. "contrast": contrast,
  3361. "saturation": saturation,
  3362. "hue": hue,
  3363. "order": order,
  3364. }
  3365. def apply(
  3366. self,
  3367. img: np.ndarray,
  3368. brightness: float,
  3369. contrast: float,
  3370. saturation: float,
  3371. hue: float,
  3372. order: list[int],
  3373. **params: Any,
  3374. ) -> np.ndarray:
  3375. """Apply the ColorJitter transform to the input image.
  3376. Args:
  3377. img (np.ndarray): The input image to apply the ColorJitter transform to.
  3378. brightness (float): The brightness factor.
  3379. contrast (float): The contrast factor.
  3380. saturation (float): The saturation factor.
  3381. hue (float): The hue factor.
  3382. order (list[int]): The order of application for the color transformations.
  3383. **params (Any): Additional parameters (not used in this transform).
  3384. Returns:
  3385. np.ndarray: The image with the applied ColorJitter transform.
  3386. """
  3387. if not is_rgb_image(img) and not is_grayscale_image(img):
  3388. msg = "ColorJitter transformation expects 1-channel or 3-channel images."
  3389. raise TypeError(msg)
  3390. color_transforms = [brightness, contrast, saturation, hue]
  3391. for i in order:
  3392. img = self.transforms[i](img, color_transforms[i])
  3393. return img
  3394. class Sharpen(ImageOnlyTransform):
  3395. """Sharpen the input image using either kernel-based or Gaussian interpolation method.
  3396. Implements two different approaches to image sharpening:
  3397. 1. Traditional kernel-based method using Laplacian operator
  3398. 2. Gaussian interpolation method (similar to Kornia's approach)
  3399. Args:
  3400. alpha (tuple[float, float]): Range for the visibility of sharpening effect.
  3401. At 0, only the original image is visible, at 1.0 only its processed version is visible.
  3402. Values should be in the range [0, 1].
  3403. Used in both methods. Default: (0.2, 0.5).
  3404. lightness (tuple[float, float]): Range for the lightness of the sharpened image.
  3405. Only used in 'kernel' method. Larger values create higher contrast.
  3406. Values should be greater than 0. Default: (0.5, 1.0).
  3407. method (Literal['kernel', 'gaussian']): Sharpening algorithm to use:
  3408. - 'kernel': Traditional kernel-based sharpening using Laplacian operator
  3409. - 'gaussian': Interpolation between Gaussian blurred and original image
  3410. Default: 'kernel'
  3411. kernel_size (int): Size of the Gaussian blur kernel for 'gaussian' method.
  3412. Must be odd. Default: 5
  3413. sigma (float): Standard deviation for Gaussian kernel in 'gaussian' method.
  3414. Default: 1.0
  3415. p (float): Probability of applying the transform. Default: 0.5.
  3416. Image types:
  3417. uint8, float32
  3418. Number of channels:
  3419. Any
  3420. Mathematical Formulation:
  3421. 1. Kernel Method:
  3422. The sharpening operation is based on the Laplacian operator L:
  3423. L = [[-1, -1, -1],
  3424. [-1, 8, -1],
  3425. [-1, -1, -1]]
  3426. The final kernel K is a weighted sum:
  3427. K = (1 - a)I + a(L + λI)
  3428. where:
  3429. - a is the alpha value
  3430. - λ is the lightness value
  3431. - I is the identity kernel
  3432. The output image O is computed as:
  3433. O = K * I (convolution)
  3434. 2. Gaussian Method:
  3435. Based on the unsharp mask principle:
  3436. O = aI + (1-a)G
  3437. where:
  3438. - I is the input image
  3439. - G is the Gaussian blurred version of I
  3440. - a is the alpha value (sharpness)
  3441. The Gaussian kernel G(x,y) is defined as:
  3442. G(x,y) = (1/(2πs²))exp(-(x²+y²)/(2s²))
  3443. Note:
  3444. - Kernel sizes must be odd to maintain spatial alignment
  3445. - Methods produce different visual results:
  3446. * Kernel method: More pronounced edges, possible artifacts
  3447. * Gaussian method: More natural look, limited to original sharpness
  3448. Examples:
  3449. >>> import albumentations as A
  3450. >>> import numpy as np
  3451. # Traditional kernel sharpening
  3452. >>> transform = A.Sharpen(
  3453. ... alpha=(0.2, 0.5),
  3454. ... lightness=(0.5, 1.0),
  3455. ... method='kernel',
  3456. ... p=1.0
  3457. ... )
  3458. # Gaussian interpolation sharpening
  3459. >>> transform = A.Sharpen(
  3460. ... alpha=(0.5, 1.0),
  3461. ... method='gaussian',
  3462. ... kernel_size=5,
  3463. ... sigma=1.0,
  3464. ... p=1.0
  3465. ... )
  3466. References:
  3467. - R. C. Gonzalez and R. E. Woods, "Digital Image Processing (4th Edition),": Chapter 3:
  3468. Intensity Transformations and Spatial Filtering.
  3469. - J. C. Russ, "The Image Processing Handbook (7th Edition),": Chapter 4: Image Enhancement.
  3470. - T. Acharya and A. K. Ray, "Image Processing: Principles and Applications,": Chapter 5: Image Enhancement.
  3471. - Unsharp masking: https://en.wikipedia.org/wiki/Unsharp_masking
  3472. - Laplacian operator: https://en.wikipedia.org/wiki/Laplace_operator
  3473. - Gaussian blur: https://en.wikipedia.org/wiki/Gaussian_blur
  3474. See Also:
  3475. - Blur: For Gaussian blurring
  3476. - UnsharpMask: Alternative sharpening method
  3477. - RandomBrightnessContrast: For adjusting image contrast
  3478. """
  3479. class InitSchema(BaseTransformInitSchema):
  3480. alpha: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, 1))]
  3481. lightness: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, None))]
  3482. method: Literal["kernel", "gaussian"]
  3483. kernel_size: int = Field(ge=3)
  3484. sigma: float = Field(gt=0)
  3485. @field_validator("kernel_size")
  3486. @classmethod
  3487. def _check_kernel_size(cls, value: int) -> int:
  3488. return value + 1 if value % 2 == 0 else value
  3489. def __init__(
  3490. self,
  3491. alpha: tuple[float, float] = (0.2, 0.5),
  3492. lightness: tuple[float, float] = (0.5, 1.0),
  3493. method: Literal["kernel", "gaussian"] = "kernel",
  3494. kernel_size: int = 5,
  3495. sigma: float = 1.0,
  3496. p: float = 0.5,
  3497. ):
  3498. super().__init__(p=p)
  3499. self.alpha = alpha
  3500. self.lightness = lightness
  3501. self.method = method
  3502. self.kernel_size = kernel_size
  3503. self.sigma = sigma
  3504. @staticmethod
  3505. def __generate_sharpening_matrix(
  3506. alpha: np.ndarray,
  3507. lightness: np.ndarray,
  3508. ) -> np.ndarray:
  3509. matrix_nochange = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32)
  3510. matrix_effect = np.array(
  3511. [[-1, -1, -1], [-1, 8 + lightness, -1], [-1, -1, -1]],
  3512. dtype=np.float32,
  3513. )
  3514. return (1 - alpha) * matrix_nochange + alpha * matrix_effect
  3515. def get_params(self) -> dict[str, Any]:
  3516. """Generate parameters for the Sharpen transform.
  3517. Returns:
  3518. dict[str, Any]: The parameters of the transform.
  3519. """
  3520. alpha = self.py_random.uniform(*self.alpha)
  3521. if self.method == "kernel":
  3522. lightness = self.py_random.uniform(*self.lightness)
  3523. return {
  3524. "alpha": alpha,
  3525. "sharpening_matrix": self.__generate_sharpening_matrix(
  3526. alpha,
  3527. lightness,
  3528. ),
  3529. }
  3530. return {"alpha": alpha, "sharpening_matrix": None}
  3531. def apply(
  3532. self,
  3533. img: np.ndarray,
  3534. alpha: float,
  3535. sharpening_matrix: np.ndarray | None,
  3536. **params: Any,
  3537. ) -> np.ndarray:
  3538. """Apply the Sharpen transform to the input image.
  3539. Args:
  3540. img (np.ndarray): The input image to apply the Sharpen transform to.
  3541. alpha (float): The alpha value.
  3542. sharpening_matrix (np.ndarray | None): The sharpening matrix.
  3543. **params (Any): Additional parameters for the transform.
  3544. """
  3545. if self.method == "kernel":
  3546. return fpixel.convolve(img, sharpening_matrix)
  3547. return fpixel.sharpen_gaussian(img, alpha, self.kernel_size, self.sigma)
  3548. class Emboss(ImageOnlyTransform):
  3549. """Apply embossing effect to the input image.
  3550. This transform creates an emboss effect by highlighting edges and creating a 3D-like texture
  3551. in the image. It works by applying a specific convolution kernel to the image that emphasizes
  3552. differences in adjacent pixel values.
  3553. Args:
  3554. alpha (tuple[float, float]): Range to choose the visibility of the embossed image.
  3555. At 0, only the original image is visible, at 1.0 only its embossed version is visible.
  3556. Values should be in the range [0, 1].
  3557. Alpha will be randomly selected from this range for each image.
  3558. Default: (0.2, 0.5)
  3559. strength (tuple[float, float]): Range to choose the strength of the embossing effect.
  3560. Higher values create a more pronounced 3D effect.
  3561. Values should be non-negative.
  3562. Strength will be randomly selected from this range for each image.
  3563. Default: (0.2, 0.7)
  3564. p (float): Probability of applying the transform. Should be in the range [0, 1].
  3565. Default: 0.5
  3566. Targets:
  3567. image, volume
  3568. Image types:
  3569. uint8, float32
  3570. Note:
  3571. - The emboss effect is created using a 3x3 convolution kernel.
  3572. - The 'alpha' parameter controls the blend between the original image and the embossed version.
  3573. A higher alpha value will result in a more pronounced emboss effect.
  3574. - The 'strength' parameter affects the intensity of the embossing. Higher strength values
  3575. will create more contrast in the embossed areas, resulting in a stronger 3D-like effect.
  3576. - This transform can be useful for creating artistic effects or for data augmentation
  3577. in tasks where edge information is important.
  3578. Examples:
  3579. >>> import numpy as np
  3580. >>> import albumentations as A
  3581. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  3582. >>> transform = A.Emboss(alpha=(0.2, 0.5), strength=(0.2, 0.7), p=0.5)
  3583. >>> result = transform(image=image)
  3584. >>> embossed_image = result['image']
  3585. References:
  3586. - Image Embossing: https://en.wikipedia.org/wiki/Image_embossing
  3587. - Application of Emboss Filtering in Image Processing: https://www.researchgate.net/publication/303412455_Application_of_Emboss_Filtering_in_Image_Processing
  3588. """
  3589. class InitSchema(BaseTransformInitSchema):
  3590. alpha: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, 1))]
  3591. strength: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, None))]
  3592. def __init__(
  3593. self,
  3594. alpha: tuple[float, float] = (0.2, 0.5),
  3595. strength: tuple[float, float] = (0.2, 0.7),
  3596. p: float = 0.5,
  3597. ):
  3598. super().__init__(p=p)
  3599. self.alpha = alpha
  3600. self.strength = strength
  3601. @staticmethod
  3602. def __generate_emboss_matrix(
  3603. alpha_sample: np.ndarray,
  3604. strength_sample: np.ndarray,
  3605. ) -> np.ndarray:
  3606. matrix_nochange = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32)
  3607. matrix_effect = np.array(
  3608. [
  3609. [-1 - strength_sample, 0 - strength_sample, 0],
  3610. [0 - strength_sample, 1, 0 + strength_sample],
  3611. [0, 0 + strength_sample, 1 + strength_sample],
  3612. ],
  3613. dtype=np.float32,
  3614. )
  3615. return (1 - alpha_sample) * matrix_nochange + alpha_sample * matrix_effect
  3616. def get_params(self) -> dict[str, np.ndarray]:
  3617. """Generate parameters for the Emboss transform.
  3618. Returns:
  3619. dict[str, np.ndarray]: The parameters of the transform.
  3620. """
  3621. alpha = self.py_random.uniform(*self.alpha)
  3622. strength = self.py_random.uniform(*self.strength)
  3623. emboss_matrix = self.__generate_emboss_matrix(
  3624. alpha_sample=alpha,
  3625. strength_sample=strength,
  3626. )
  3627. return {"emboss_matrix": emboss_matrix}
  3628. def apply(
  3629. self,
  3630. img: np.ndarray,
  3631. emboss_matrix: np.ndarray,
  3632. **params: Any,
  3633. ) -> np.ndarray:
  3634. """Apply the Emboss transform to the input image.
  3635. Args:
  3636. img (np.ndarray): The input image to apply the Emboss transform to.
  3637. emboss_matrix (np.ndarray): The emboss matrix.
  3638. **params (Any): Additional parameters for the transform.
  3639. """
  3640. return fpixel.convolve(img, emboss_matrix)
  3641. class Superpixels(ImageOnlyTransform):
  3642. """Transform images partially/completely to their superpixel representation.
  3643. Args:
  3644. p_replace (tuple[float, float] | float): Defines for any segment the probability that the pixels within that
  3645. segment are replaced by their average color (otherwise, the pixels are not changed).
  3646. * A probability of ``0.0`` would mean, that the pixels in no
  3647. segment are replaced by their average color (image is not
  3648. changed at all).
  3649. * A probability of ``0.5`` would mean, that around half of all
  3650. segments are replaced by their average color.
  3651. * A probability of ``1.0`` would mean, that all segments are
  3652. replaced by their average color (resulting in a voronoi
  3653. image).
  3654. Behavior based on chosen data types for this parameter:
  3655. * If a ``float``, then that ``float`` will always be used.
  3656. * If ``tuple`` ``(a, b)``, then a random probability will be
  3657. sampled from the interval ``[a, b]`` per image.
  3658. Default: (0.1, 0.3)
  3659. n_segments (tuple[int, int] | int): Rough target number of how many superpixels to generate.
  3660. The algorithm may deviate from this number.
  3661. Lower value will lead to coarser superpixels.
  3662. Higher values are computationally more intensive and will hence lead to a slowdown.
  3663. If tuple ``(a, b)``, then a value from the discrete interval ``[a..b]`` will be sampled per image.
  3664. Default: (15, 120)
  3665. max_size (int | None): Maximum image size at which the augmentation is performed.
  3666. If the width or height of an image exceeds this value, it will be
  3667. downscaled before the augmentation so that the longest side matches `max_size`.
  3668. This is done to speed up the process. The final output image has the same size as the input image.
  3669. Note that in case `p_replace` is below ``1.0``,
  3670. the down-/upscaling will affect the not-replaced pixels too.
  3671. Use ``None`` to apply no down-/upscaling.
  3672. Default: 128
  3673. interpolation (OpenCV flag): Flag that is used to specify the interpolation algorithm. Should be one of:
  3674. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  3675. Default: cv2.INTER_LINEAR.
  3676. p (float): Probability of applying the transform. Default: 0.5.
  3677. Targets:
  3678. image, volume
  3679. Image types:
  3680. uint8, float32
  3681. Number of channels:
  3682. Any
  3683. Note:
  3684. - This transform can significantly change the visual appearance of the image.
  3685. - The transform makes use of a superpixel algorithm, which tends to be slow.
  3686. If performance is a concern, consider using `max_size` to limit the image size.
  3687. - The effect of this transform can vary greatly depending on the `p_replace` and `n_segments` parameters.
  3688. - When `p_replace` is high, the image can become highly abstracted, resembling a voronoi diagram.
  3689. - The transform preserves the original image type (uint8 or float32).
  3690. Mathematical Formulation:
  3691. 1. The image is segmented into approximately `n_segments` superpixels using the SLIC algorithm.
  3692. 2. For each superpixel:
  3693. - With probability `p_replace`, all pixels in the superpixel are replaced with their mean color.
  3694. - With probability `1 - p_replace`, the superpixel is left unchanged.
  3695. 3. If the image was resized due to `max_size`, it is resized back to its original dimensions.
  3696. Examples:
  3697. >>> import numpy as np
  3698. >>> import albumentations as A
  3699. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  3700. # Apply superpixels with default parameters
  3701. >>> transform = A.Superpixels(p=1.0)
  3702. >>> augmented_image = transform(image=image)['image']
  3703. # Apply superpixels with custom parameters
  3704. >>> transform = A.Superpixels(
  3705. ... p_replace=(0.5, 0.7),
  3706. ... n_segments=(50, 100),
  3707. ... max_size=None,
  3708. ... interpolation=cv2.INTER_NEAREST,
  3709. ... p=1.0
  3710. ... )
  3711. >>> augmented_image = transform(image=image)['image']
  3712. """
  3713. class InitSchema(BaseTransformInitSchema):
  3714. p_replace: ZeroOneRangeType
  3715. n_segments: OnePlusIntRangeType
  3716. max_size: int | None = Field(ge=1)
  3717. interpolation: Literal[
  3718. cv2.INTER_NEAREST,
  3719. cv2.INTER_NEAREST_EXACT,
  3720. cv2.INTER_LINEAR,
  3721. cv2.INTER_CUBIC,
  3722. cv2.INTER_AREA,
  3723. cv2.INTER_LANCZOS4,
  3724. cv2.INTER_LINEAR_EXACT,
  3725. ]
  3726. def __init__(
  3727. self,
  3728. p_replace: tuple[float, float] | float = (0, 0.1),
  3729. n_segments: tuple[int, int] | int = (100, 100),
  3730. max_size: int | None = 128,
  3731. interpolation: Literal[
  3732. cv2.INTER_NEAREST,
  3733. cv2.INTER_NEAREST_EXACT,
  3734. cv2.INTER_LINEAR,
  3735. cv2.INTER_CUBIC,
  3736. cv2.INTER_AREA,
  3737. cv2.INTER_LANCZOS4,
  3738. cv2.INTER_LINEAR_EXACT,
  3739. ] = cv2.INTER_LINEAR,
  3740. p: float = 0.5,
  3741. ):
  3742. super().__init__(p=p)
  3743. self.p_replace = cast("tuple[float, float]", p_replace)
  3744. self.n_segments = cast("tuple[int, int]", n_segments)
  3745. self.max_size = max_size
  3746. self.interpolation = interpolation
  3747. def get_params(self) -> dict[str, Any]:
  3748. """Generate parameters for the Superpixels transform.
  3749. Returns:
  3750. dict[str, Any]: The parameters of the transform.
  3751. """
  3752. n_segments = self.py_random.randint(*self.n_segments)
  3753. p = self.py_random.uniform(*self.p_replace)
  3754. return {
  3755. "replace_samples": self.random_generator.random(n_segments) < p,
  3756. "n_segments": n_segments,
  3757. }
  3758. def apply(
  3759. self,
  3760. img: np.ndarray,
  3761. replace_samples: Sequence[bool],
  3762. n_segments: int,
  3763. **kwargs: Any,
  3764. ) -> np.ndarray:
  3765. """Apply the Superpixels transform to the input image.
  3766. Args:
  3767. img (np.ndarray): The input image to apply the Superpixels transform to.
  3768. replace_samples (Sequence[bool]): Whether to replace pixels in segments.
  3769. n_segments (int): Number of superpixels.
  3770. **kwargs (Any): Additional parameters (not used in this transform).
  3771. Returns:
  3772. np.ndarray: The image with the applied Superpixels transform.
  3773. """
  3774. return fpixel.superpixels(
  3775. img,
  3776. n_segments,
  3777. replace_samples,
  3778. self.max_size,
  3779. self.interpolation,
  3780. )
  3781. class RingingOvershoot(ImageOnlyTransform):
  3782. """Create ringing or overshoot artifacts by convolving the image with a 2D sinc filter.
  3783. This transform simulates the ringing artifacts that can occur in digital image processing,
  3784. particularly after sharpening or edge enhancement operations. It creates oscillations
  3785. or overshoots near sharp transitions in the image.
  3786. Args:
  3787. blur_limit (tuple[int, int] | int): Maximum kernel size for the sinc filter.
  3788. Must be an odd number in the range [3, inf).
  3789. If a single int is provided, the kernel size will be randomly chosen
  3790. from the range (3, blur_limit). If a tuple (min, max) is provided,
  3791. the kernel size will be randomly chosen from the range (min, max).
  3792. Default: (7, 15).
  3793. cutoff (tuple[float, float]): Range to choose the cutoff frequency in radians.
  3794. Values should be in the range (0, π). A lower cutoff frequency will
  3795. result in more pronounced ringing effects.
  3796. Default: (π/4, π/2).
  3797. p (float): Probability of applying the transform. Default: 0.5.
  3798. Targets:
  3799. image, volume
  3800. Image types:
  3801. uint8, float32
  3802. Number of channels:
  3803. Any
  3804. Note:
  3805. - Ringing artifacts are oscillations of the image intensity function in the neighborhood
  3806. of sharp transitions, such as edges or object boundaries.
  3807. - This transform uses a 2D sinc filter (also known as a 2D cardinal sine function)
  3808. to introduce these artifacts.
  3809. - The severity of the ringing effect is controlled by both the kernel size (blur_limit)
  3810. and the cutoff frequency.
  3811. - Larger kernel sizes and lower cutoff frequencies will generally produce more
  3812. noticeable ringing effects.
  3813. - This transform can be useful for:
  3814. * Simulating imperfections in image processing or transmission systems
  3815. * Testing the robustness of computer vision models to ringing artifacts
  3816. * Creating artistic effects that emphasize edges and transitions in images
  3817. Mathematical Formulation:
  3818. The 2D sinc filter kernel is defined as:
  3819. K(x, y) = cutoff * J₁(cutoff * √(x² + y²)) / (2π * √(x² + y²))
  3820. where:
  3821. - J₁ is the Bessel function of the first kind of order 1
  3822. - cutoff is the chosen cutoff frequency
  3823. - x and y are the distances from the kernel center
  3824. The filtered image I' is obtained by convolving the input image I with the kernel K:
  3825. I'(x, y) = ∑∑ I(x-u, y-v) * K(u, v)
  3826. The convolution operation introduces the ringing artifacts near sharp transitions.
  3827. Examples:
  3828. >>> import numpy as np
  3829. >>> import albumentations as A
  3830. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  3831. # Apply ringing effect with default parameters
  3832. >>> transform = A.RingingOvershoot(p=1.0)
  3833. >>> ringing_image = transform(image=image)['image']
  3834. # Apply ringing effect with custom parameters
  3835. >>> transform = A.RingingOvershoot(
  3836. ... blur_limit=(9, 17),
  3837. ... cutoff=(np.pi/6, np.pi/3),
  3838. ... p=1.0
  3839. ... )
  3840. >>> ringing_image = transform(image=image)['image']
  3841. References:
  3842. - Ringing artifacts: https://en.wikipedia.org/wiki/Ringing_artifacts
  3843. - Sinc filter: https://en.wikipedia.org/wiki/Sinc_filter
  3844. - Digital Image Processing: Rafael C. Gonzalez and Richard E. Woods, 4th Edition
  3845. """
  3846. class InitSchema(BlurInitSchema):
  3847. blur_limit: tuple[int, int] | int
  3848. cutoff: Annotated[
  3849. tuple[float, float],
  3850. AfterValidator(check_range_bounds(0, np.pi)),
  3851. AfterValidator(nondecreasing),
  3852. ]
  3853. def __init__(
  3854. self,
  3855. blur_limit: tuple[int, int] | int = (7, 15),
  3856. cutoff: tuple[float, float] = (np.pi / 4, np.pi / 2),
  3857. p: float = 0.5,
  3858. ):
  3859. super().__init__(p=p)
  3860. self.blur_limit = cast("tuple[int, int]", blur_limit)
  3861. self.cutoff = cutoff
  3862. def get_params(self) -> dict[str, np.ndarray]:
  3863. """Generate parameters for the RingingOvershoot transform.
  3864. Returns:
  3865. dict[str, np.ndarray]: The parameters of the transform.
  3866. """
  3867. ksize = self.py_random.randrange(self.blur_limit[0], self.blur_limit[1] + 1, 2)
  3868. if ksize % 2 == 0:
  3869. ksize += 1
  3870. cutoff = self.py_random.uniform(*self.cutoff)
  3871. # From dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter
  3872. with np.errstate(divide="ignore", invalid="ignore"):
  3873. kernel = np.fromfunction(
  3874. lambda x, y: cutoff
  3875. * special.j1(
  3876. cutoff * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2),
  3877. )
  3878. / (2 * np.pi * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2)),
  3879. [ksize, ksize],
  3880. )
  3881. kernel[(ksize - 1) // 2, (ksize - 1) // 2] = cutoff**2 / (4 * np.pi)
  3882. # Normalize kernel
  3883. kernel = kernel.astype(np.float32) / np.sum(kernel)
  3884. return {"kernel": kernel}
  3885. def apply(self, img: np.ndarray, kernel: np.ndarray, **params: Any) -> np.ndarray:
  3886. """Apply the RingingOvershoot transform to the input image.
  3887. Args:
  3888. img (np.ndarray): The input image to apply the RingingOvershoot transform to.
  3889. kernel (np.ndarray): The kernel for the convolution.
  3890. **params (Any): Additional parameters (not used in this transform).
  3891. """
  3892. return fpixel.convolve(img, kernel)
  3893. class UnsharpMask(ImageOnlyTransform):
  3894. """Sharpen the input image using Unsharp Masking processing and overlays the result with the original image.
  3895. Unsharp masking is a technique that enhances edge contrast in an image, creating the illusion of increased
  3896. sharpness.
  3897. This transform applies Gaussian blur to create a blurred version of the image, then uses this to create a mask
  3898. which is combined with the original image to enhance edges and fine details.
  3899. Args:
  3900. blur_limit (tuple[int, int] | int): maximum Gaussian kernel size for blurring the input image.
  3901. Must be zero or odd and in range [0, inf). If set to 0 it will be computed from sigma
  3902. as `round(sigma * (3 if img.dtype == np.uint8 else 4) * 2 + 1) + 1`.
  3903. If set single value `blur_limit` will be in range (0, blur_limit).
  3904. Default: (3, 7).
  3905. sigma_limit (tuple[float, float] | float): Gaussian kernel standard deviation. Must be more or equal to 0.
  3906. If set single value `sigma_limit` will be in range (0, sigma_limit).
  3907. If set to 0 sigma will be computed as `sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`. Default: 0.
  3908. alpha (tuple[float, float]): range to choose the visibility of the sharpened image.
  3909. At 0, only the original image is visible, at 1.0 only its sharpened version is visible.
  3910. Default: (0.2, 0.5).
  3911. threshold (int): Value to limit sharpening only for areas with high pixel difference between original image
  3912. and it's smoothed version. Higher threshold means less sharpening on flat areas.
  3913. Must be in range [0, 255]. Default: 10.
  3914. p (float): probability of applying the transform. Default: 0.5.
  3915. Targets:
  3916. image, volume
  3917. Image types:
  3918. uint8, float32
  3919. Note:
  3920. - The algorithm creates a mask M = (I - G) * alpha, where I is the original image and G is the Gaussian
  3921. blurred version.
  3922. - The final image is computed as: output = I + M if |I - G| > threshold, else I.
  3923. - Higher alpha values increase the strength of the sharpening effect.
  3924. - Higher threshold values limit the sharpening effect to areas with more significant edges or details.
  3925. - The blur_limit and sigma_limit parameters control the Gaussian blur used to create the mask.
  3926. References:
  3927. Unsharp Masking: https://en.wikipedia.org/wiki/Unsharp_masking
  3928. Examples:
  3929. >>> import numpy as np
  3930. >>> import albumentations as A
  3931. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  3932. >>>
  3933. # Apply UnsharpMask with default parameters
  3934. >>> transform = A.UnsharpMask(p=1.0)
  3935. >>> sharpened_image = transform(image=image)['image']
  3936. >>>
  3937. # Apply UnsharpMask with custom parameters
  3938. >>> transform = A.UnsharpMask(
  3939. ... blur_limit=(3, 7),
  3940. ... sigma_limit=(0.1, 0.5),
  3941. ... alpha=(0.2, 0.7),
  3942. ... threshold=15,
  3943. ... p=1.0
  3944. ... )
  3945. >>> sharpened_image = transform(image=image)['image']
  3946. """
  3947. class InitSchema(BaseTransformInitSchema):
  3948. sigma_limit: NonNegativeFloatRangeType
  3949. alpha: ZeroOneRangeType
  3950. threshold: int = Field(ge=0, le=255)
  3951. blur_limit: tuple[int, int] | int
  3952. @field_validator("blur_limit")
  3953. @classmethod
  3954. def _process_blur(
  3955. cls,
  3956. value: tuple[int, int] | int,
  3957. info: ValidationInfo,
  3958. ) -> tuple[int, int]:
  3959. return fblur.process_blur_limit(value, info, min_value=3)
  3960. def __init__(
  3961. self,
  3962. blur_limit: tuple[int, int] | int = (3, 7),
  3963. sigma_limit: tuple[float, float] | float = 0.0,
  3964. alpha: tuple[float, float] | float = (0.2, 0.5),
  3965. threshold: int = 10,
  3966. p: float = 0.5,
  3967. ):
  3968. super().__init__(p=p)
  3969. self.blur_limit = cast("tuple[int, int]", blur_limit)
  3970. self.sigma_limit = cast("tuple[float, float]", sigma_limit)
  3971. self.alpha = cast("tuple[float, float]", alpha)
  3972. self.threshold = threshold
  3973. def get_params_dependent_on_data(
  3974. self,
  3975. params: dict[str, Any],
  3976. data: dict[str, Any],
  3977. ) -> dict[str, Any]:
  3978. """Generate parameters for the UnsharpMask transform.
  3979. Returns:
  3980. dict[str, Any]: The parameters of the transform.
  3981. """
  3982. return {
  3983. "ksize": self.py_random.randrange(
  3984. self.blur_limit[0],
  3985. self.blur_limit[1] + 1,
  3986. 2,
  3987. ),
  3988. "sigma": self.py_random.uniform(*self.sigma_limit),
  3989. "alpha": self.py_random.uniform(*self.alpha),
  3990. }
  3991. def apply(
  3992. self,
  3993. img: np.ndarray,
  3994. ksize: int,
  3995. sigma: int,
  3996. alpha: float,
  3997. **params: Any,
  3998. ) -> np.ndarray:
  3999. """Apply the UnsharpMask transform to the input image.
  4000. Args:
  4001. img (np.ndarray): The input image to apply the UnsharpMask transform to.
  4002. ksize (int): The kernel size for the convolution.
  4003. sigma (int): The standard deviation for the Gaussian blur.
  4004. alpha (float): The visibility of the sharpened image.
  4005. **params (Any): Additional parameters (not used in this transform).
  4006. Returns:
  4007. np.ndarray: The image with the applied UnsharpMask transform.
  4008. """
  4009. return fpixel.unsharp_mask(
  4010. img,
  4011. ksize,
  4012. sigma=sigma,
  4013. alpha=alpha,
  4014. threshold=self.threshold,
  4015. )
  4016. class Spatter(ImageOnlyTransform):
  4017. """Apply spatter transform. It simulates corruption which can occlude a lens in the form of rain or mud.
  4018. Args:
  4019. mean (tuple[float, float] | float): Mean value of normal distribution for generating liquid layer.
  4020. If single float mean will be sampled from `(0, mean)`
  4021. If tuple of float mean will be sampled from range `(mean[0], mean[1])`.
  4022. If you want constant value use (mean, mean).
  4023. Default (0.65, 0.65)
  4024. std (tuple[float, float] | float): Standard deviation value of normal distribution for generating liquid layer.
  4025. If single float the number will be sampled from `(0, std)`.
  4026. If tuple of float std will be sampled from range `(std[0], std[1])`.
  4027. If you want constant value use (std, std).
  4028. Default: (0.3, 0.3).
  4029. gauss_sigma (tuple[float, float] | floats): Sigma value for gaussian filtering of liquid layer.
  4030. If single float the number will be sampled from `(0, gauss_sigma)`.
  4031. If tuple of float gauss_sigma will be sampled from range `(gauss_sigma[0], gauss_sigma[1])`.
  4032. If you want constant value use (gauss_sigma, gauss_sigma).
  4033. Default: (2, 3).
  4034. cutout_threshold (tuple[float, float] | floats): Threshold for filtering liquid layer
  4035. (determines number of drops). If single float it will used as cutout_threshold.
  4036. If single float the number will be sampled from `(0, cutout_threshold)`.
  4037. If tuple of float cutout_threshold will be sampled from range `(cutout_threshold[0], cutout_threshold[1])`.
  4038. If you want constant value use `(cutout_threshold, cutout_threshold)`.
  4039. Default: (0.68, 0.68).
  4040. intensity (tuple[float, float] | floats): Intensity of corruption.
  4041. If single float the number will be sampled from `(0, intensity)`.
  4042. If tuple of float intensity will be sampled from range `(intensity[0], intensity[1])`.
  4043. If you want constant value use `(intensity, intensity)`.
  4044. Default: (0.6, 0.6).
  4045. mode (Literal["rain", "mud"]): Type of corruption. Default: "rain".
  4046. color (tuple[int, ...] | None): Corruption elements color.
  4047. If list uses provided list as color for the effect.
  4048. If None uses default colors based on mode (rain: (238, 238, 175), mud: (20, 42, 63)).
  4049. p (float): probability of applying the transform. Default: 0.5.
  4050. Targets:
  4051. image
  4052. Image types:
  4053. uint8, float32
  4054. References:
  4055. Benchmarking Neural Network Robustness to Common Corruptions and Perturbations: https://arxiv.org/abs/1903.12261
  4056. Examples:
  4057. >>> import numpy as np
  4058. >>> import albumentations as A
  4059. >>> import cv2
  4060. >>>
  4061. >>> # Create a sample image
  4062. >>> image = np.ones((300, 300, 3), dtype=np.uint8) * 200 # Light gray background
  4063. >>> # Add some gradient to make effects more visible
  4064. >>> for i in range(300):
  4065. ... image[i, :, :] = np.clip(image[i, :, :] - i // 3, 0, 255)
  4066. >>>
  4067. >>> # Example 1: Rain effect with default parameters
  4068. >>> rain_transform = A.Spatter(
  4069. ... mode="rain",
  4070. ... p=1.0
  4071. ... )
  4072. >>> rain_result = rain_transform(image=image)
  4073. >>> rain_image = rain_result['image'] # Image with rain drops
  4074. >>>
  4075. >>> # Example 2: Heavy rain with custom parameters
  4076. >>> heavy_rain = A.Spatter(
  4077. ... mode="rain",
  4078. ... mean=(0.7, 0.7), # Higher mean = more coverage
  4079. ... std=(0.2, 0.2), # Lower std = more uniform effect
  4080. ... cutout_threshold=(0.65, 0.65), # Lower threshold = more drops
  4081. ... intensity=(0.8, 0.8), # Higher intensity = more visible effect
  4082. ... color=(200, 200, 255), # Blueish rain drops
  4083. ... p=1.0
  4084. ... )
  4085. >>> heavy_rain_result = heavy_rain(image=image)
  4086. >>> heavy_rain_image = heavy_rain_result['image']
  4087. >>>
  4088. >>> # Example 3: Mud effect
  4089. >>> mud_transform = A.Spatter(
  4090. ... mode="mud",
  4091. ... mean=(0.6, 0.6),
  4092. ... std=(0.3, 0.3),
  4093. ... cutout_threshold=(0.62, 0.62),
  4094. ... intensity=(0.7, 0.7),
  4095. ... p=1.0
  4096. ... )
  4097. >>> mud_result = mud_transform(image=image)
  4098. >>> mud_image = mud_result['image'] # Image with mud splatters
  4099. >>>
  4100. >>> # Example 4: Custom colored mud
  4101. >>> red_mud = A.Spatter(
  4102. ... mode="mud",
  4103. ... mean=(0.55, 0.55),
  4104. ... std=(0.25, 0.25),
  4105. ... cutout_threshold=(0.7, 0.7),
  4106. ... intensity=(0.6, 0.6),
  4107. ... color=(120, 40, 40), # Reddish-brown mud
  4108. ... p=1.0
  4109. ... )
  4110. >>> red_mud_result = red_mud(image=image)
  4111. >>> red_mud_image = red_mud_result['image']
  4112. >>>
  4113. >>> # Example 5: Random effect (50% chance of applying)
  4114. >>> random_spatter = A.Compose([
  4115. ... A.Spatter(
  4116. ... mode="rain" if np.random.random() < 0.5 else "mud",
  4117. ... p=0.5
  4118. ... )
  4119. ... ])
  4120. >>> random_result = random_spatter(image=image)
  4121. >>> result_image = random_result['image'] # May or may not have spatter effect
  4122. """
  4123. class InitSchema(BaseTransformInitSchema):
  4124. mean: ZeroOneRangeType
  4125. std: ZeroOneRangeType
  4126. gauss_sigma: NonNegativeFloatRangeType
  4127. cutout_threshold: ZeroOneRangeType
  4128. intensity: ZeroOneRangeType
  4129. mode: Literal["rain", "mud"]
  4130. color: Sequence[int] | None
  4131. @model_validator(mode="after")
  4132. def _check_color(self) -> Self:
  4133. # Default colors for each mode
  4134. default_colors = {"rain": [238, 238, 175], "mud": [20, 42, 63]}
  4135. if self.color is None:
  4136. # Use default color for the selected mode
  4137. self.color = default_colors[self.mode]
  4138. # Validate the provided color
  4139. elif len(self.color) != NUM_RGB_CHANNELS:
  4140. msg = "Color must be a list of three integers for RGB format."
  4141. raise ValueError(msg)
  4142. return self
  4143. def __init__(
  4144. self,
  4145. mean: tuple[float, float] | float = (0.65, 0.65),
  4146. std: tuple[float, float] | float = (0.3, 0.3),
  4147. gauss_sigma: tuple[float, float] | float = (2, 2),
  4148. cutout_threshold: tuple[float, float] | float = (0.68, 0.68),
  4149. intensity: tuple[float, float] | float = (0.6, 0.6),
  4150. mode: Literal["rain", "mud"] = "rain",
  4151. color: tuple[int, ...] | None = None,
  4152. p: float = 0.5,
  4153. ):
  4154. super().__init__(p=p)
  4155. self.mean = cast("tuple[float, float]", mean)
  4156. self.std = cast("tuple[float, float]", std)
  4157. self.gauss_sigma = cast("tuple[float, float]", gauss_sigma)
  4158. self.cutout_threshold = cast("tuple[float, float]", cutout_threshold)
  4159. self.intensity = cast("tuple[float, float]", intensity)
  4160. self.mode = mode
  4161. self.color = cast("tuple[int, ...]", color)
  4162. def apply(
  4163. self,
  4164. img: np.ndarray,
  4165. **params: dict[str, Any],
  4166. ) -> np.ndarray:
  4167. """Apply the Spatter transform to the input image.
  4168. Args:
  4169. img (np.ndarray): The input image to apply the Spatter transform to.
  4170. **params (dict[str, Any]): Additional parameters (not used in this transform).
  4171. Returns:
  4172. np.ndarray: The image with the applied Spatter transform.
  4173. """
  4174. non_rgb_error(img)
  4175. if params["mode"] == "rain":
  4176. return fpixel.spatter_rain(img, params["drops"])
  4177. return fpixel.spatter_mud(img, params["non_mud"], params["mud"])
  4178. def get_params_dependent_on_data(
  4179. self,
  4180. params: dict[str, Any],
  4181. data: dict[str, Any],
  4182. ) -> dict[str, Any]:
  4183. """Generate parameters for the Spatter transform.
  4184. Returns:
  4185. dict[str, Any]: The parameters of the transform.
  4186. """
  4187. height, width = params["shape"][:2]
  4188. mean = self.py_random.uniform(*self.mean)
  4189. std = self.py_random.uniform(*self.std)
  4190. cutout_threshold = self.py_random.uniform(*self.cutout_threshold)
  4191. sigma = self.py_random.uniform(*self.gauss_sigma)
  4192. mode = self.mode
  4193. intensity = self.py_random.uniform(*self.intensity)
  4194. color = np.array(self.color) / 255.0
  4195. liquid_layer = self.random_generator.normal(
  4196. size=(height, width),
  4197. loc=mean,
  4198. scale=std,
  4199. )
  4200. # Convert sigma to kernel size (must be odd)
  4201. ksize = int(2 * round(3 * sigma) + 1) # 3 sigma rule, rounded to nearest odd
  4202. cv2.GaussianBlur(
  4203. src=liquid_layer,
  4204. dst=liquid_layer, # in-place operation
  4205. ksize=(ksize, ksize),
  4206. sigmaX=sigma,
  4207. sigmaY=sigma,
  4208. borderType=cv2.BORDER_REPLICATE,
  4209. )
  4210. # Important line, without it the rain effect looses drops
  4211. liquid_layer[liquid_layer < cutout_threshold] = 0
  4212. if mode == "rain":
  4213. return {
  4214. "mode": "rain",
  4215. **fpixel.get_rain_params(liquid_layer=liquid_layer, color=color, intensity=intensity),
  4216. }
  4217. return {
  4218. "mode": "mud",
  4219. **fpixel.get_mud_params(
  4220. liquid_layer=liquid_layer,
  4221. color=color,
  4222. cutout_threshold=cutout_threshold,
  4223. sigma=sigma,
  4224. intensity=intensity,
  4225. random_generator=self.random_generator,
  4226. ),
  4227. }
  4228. class ChromaticAberration(ImageOnlyTransform):
  4229. """Add lateral chromatic aberration by distorting the red and blue channels of the input image.
  4230. Chromatic aberration is an optical effect that occurs when a lens fails to focus all colors to the same point.
  4231. This transform simulates this effect by applying different radial distortions to the red and blue channels
  4232. of the image, while leaving the green channel unchanged.
  4233. Args:
  4234. primary_distortion_limit (tuple[float, float] | float): Range of the primary radial distortion coefficient.
  4235. If a single float value is provided, the range
  4236. will be (-primary_distortion_limit, primary_distortion_limit).
  4237. This parameter controls the distortion in the center of the image:
  4238. - Positive values result in pincushion distortion (edges bend inward)
  4239. - Negative values result in barrel distortion (edges bend outward)
  4240. Default: (-0.02, 0.02).
  4241. secondary_distortion_limit (tuple[float, float] | float): Range of the secondary radial distortion coefficient.
  4242. If a single float value is provided, the range
  4243. will be (-secondary_distortion_limit, secondary_distortion_limit).
  4244. This parameter controls the distortion in the corners of the image:
  4245. - Positive values enhance pincushion distortion
  4246. - Negative values enhance barrel distortion
  4247. Default: (-0.05, 0.05).
  4248. mode (Literal["green_purple", "red_blue", "random"]): Type of color fringing to apply. Options are:
  4249. - 'green_purple': Distorts red and blue channels in opposite directions, creating green-purple fringing.
  4250. - 'red_blue': Distorts red and blue channels in the same direction, creating red-blue fringing.
  4251. - 'random': Randomly chooses between 'green_purple' and 'red_blue' modes for each application.
  4252. Default: 'green_purple'.
  4253. interpolation (InterpolationType): Flag specifying the interpolation algorithm. Should be one of:
  4254. cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
  4255. Default: cv2.INTER_LINEAR.
  4256. p (float): Probability of applying the transform. Should be in the range [0, 1].
  4257. Default: 0.5.
  4258. Targets:
  4259. image
  4260. Image types:
  4261. uint8, float32
  4262. Number of channels:
  4263. 3
  4264. Note:
  4265. - This transform only affects RGB images. Grayscale images will raise an error.
  4266. - The strength of the effect depends on both primary and secondary distortion limits.
  4267. - Higher absolute values for distortion limits will result in more pronounced chromatic aberration.
  4268. - The 'green_purple' mode tends to produce more noticeable effects than 'red_blue'.
  4269. Examples:
  4270. >>> import albumentations as A
  4271. >>> import cv2
  4272. >>> transform = A.ChromaticAberration(
  4273. ... primary_distortion_limit=0.05,
  4274. ... secondary_distortion_limit=0.1,
  4275. ... mode='green_purple',
  4276. ... interpolation=cv2.INTER_LINEAR,
  4277. ... p=1.0
  4278. ... )
  4279. >>> transformed = transform(image=image)
  4280. >>> aberrated_image = transformed['image']
  4281. References:
  4282. Chromatic Aberration: https://en.wikipedia.org/wiki/Chromatic_aberration
  4283. """
  4284. class InitSchema(BaseTransformInitSchema):
  4285. primary_distortion_limit: SymmetricRangeType
  4286. secondary_distortion_limit: SymmetricRangeType
  4287. mode: Literal["green_purple", "red_blue", "random"]
  4288. interpolation: Literal[
  4289. cv2.INTER_NEAREST,
  4290. cv2.INTER_NEAREST_EXACT,
  4291. cv2.INTER_LINEAR,
  4292. cv2.INTER_CUBIC,
  4293. cv2.INTER_AREA,
  4294. cv2.INTER_LANCZOS4,
  4295. cv2.INTER_LINEAR_EXACT,
  4296. ]
  4297. def __init__(
  4298. self,
  4299. primary_distortion_limit: tuple[float, float] | float = (-0.02, 0.02),
  4300. secondary_distortion_limit: tuple[float, float] | float = (-0.05, 0.05),
  4301. mode: Literal["green_purple", "red_blue", "random"] = "green_purple",
  4302. interpolation: Literal[
  4303. cv2.INTER_NEAREST,
  4304. cv2.INTER_NEAREST_EXACT,
  4305. cv2.INTER_LINEAR,
  4306. cv2.INTER_CUBIC,
  4307. cv2.INTER_AREA,
  4308. cv2.INTER_LANCZOS4,
  4309. cv2.INTER_LINEAR_EXACT,
  4310. ] = cv2.INTER_LINEAR,
  4311. p: float = 0.5,
  4312. ):
  4313. super().__init__(p=p)
  4314. self.primary_distortion_limit = cast(
  4315. "tuple[float, float]",
  4316. primary_distortion_limit,
  4317. )
  4318. self.secondary_distortion_limit = cast(
  4319. "tuple[float, float]",
  4320. secondary_distortion_limit,
  4321. )
  4322. self.mode = mode
  4323. self.interpolation = interpolation
  4324. def apply(
  4325. self,
  4326. img: np.ndarray,
  4327. primary_distortion_red: float,
  4328. secondary_distortion_red: float,
  4329. primary_distortion_blue: float,
  4330. secondary_distortion_blue: float,
  4331. **params: Any,
  4332. ) -> np.ndarray:
  4333. """Apply the ChromaticAberration transform to the input image.
  4334. Args:
  4335. img (np.ndarray): The input image to apply the ChromaticAberration transform to.
  4336. primary_distortion_red (float): The primary distortion coefficient for the red channel.
  4337. secondary_distortion_red (float): The secondary distortion coefficient for the red channel.
  4338. primary_distortion_blue (float): The primary distortion coefficient for the blue channel.
  4339. secondary_distortion_blue (float): The secondary distortion coefficient for the blue channel.
  4340. **params (dict[str, Any]): Additional parameters (not used in this transform).
  4341. Returns:
  4342. np.ndarray: The image with the applied ChromaticAberration transform.
  4343. """
  4344. non_rgb_error(img)
  4345. return fpixel.chromatic_aberration(
  4346. img,
  4347. primary_distortion_red,
  4348. secondary_distortion_red,
  4349. primary_distortion_blue,
  4350. secondary_distortion_blue,
  4351. self.interpolation,
  4352. )
  4353. def get_params(self) -> dict[str, float]:
  4354. """Generate parameters for the ChromaticAberration transform.
  4355. Returns:
  4356. dict[str, float]: The parameters of the transform.
  4357. """
  4358. primary_distortion_red = self.py_random.uniform(*self.primary_distortion_limit)
  4359. secondary_distortion_red = self.py_random.uniform(
  4360. *self.secondary_distortion_limit,
  4361. )
  4362. primary_distortion_blue = self.py_random.uniform(*self.primary_distortion_limit)
  4363. secondary_distortion_blue = self.py_random.uniform(
  4364. *self.secondary_distortion_limit,
  4365. )
  4366. secondary_distortion_red = self._match_sign(
  4367. primary_distortion_red,
  4368. secondary_distortion_red,
  4369. )
  4370. secondary_distortion_blue = self._match_sign(
  4371. primary_distortion_blue,
  4372. secondary_distortion_blue,
  4373. )
  4374. if self.mode == "green_purple":
  4375. # distortion coefficients of the red and blue channels have the same sign
  4376. primary_distortion_blue = self._match_sign(
  4377. primary_distortion_red,
  4378. primary_distortion_blue,
  4379. )
  4380. secondary_distortion_blue = self._match_sign(
  4381. secondary_distortion_red,
  4382. secondary_distortion_blue,
  4383. )
  4384. if self.mode == "red_blue":
  4385. # distortion coefficients of the red and blue channels have the opposite sign
  4386. primary_distortion_blue = self._unmatch_sign(
  4387. primary_distortion_red,
  4388. primary_distortion_blue,
  4389. )
  4390. secondary_distortion_blue = self._unmatch_sign(
  4391. secondary_distortion_red,
  4392. secondary_distortion_blue,
  4393. )
  4394. return {
  4395. "primary_distortion_red": primary_distortion_red,
  4396. "secondary_distortion_red": secondary_distortion_red,
  4397. "primary_distortion_blue": primary_distortion_blue,
  4398. "secondary_distortion_blue": secondary_distortion_blue,
  4399. }
  4400. @staticmethod
  4401. def _match_sign(a: float, b: float) -> float:
  4402. # Match the sign of b to a
  4403. if (a < 0 < b) or (a > 0 > b):
  4404. return -b
  4405. return b
  4406. @staticmethod
  4407. def _unmatch_sign(a: float, b: float) -> float:
  4408. # Unmatch the sign of b to a
  4409. if (a < 0 and b < 0) or (a > 0 and b > 0):
  4410. return -b
  4411. return b
  4412. PLANKIAN_JITTER_CONST = {
  4413. "MAX_TEMP": max(
  4414. *fpixel.PLANCKIAN_COEFFS["blackbody"].keys(),
  4415. *fpixel.PLANCKIAN_COEFFS["cied"].keys(),
  4416. ),
  4417. "MIN_BLACKBODY_TEMP": min(fpixel.PLANCKIAN_COEFFS["blackbody"].keys()),
  4418. "MIN_CIED_TEMP": min(fpixel.PLANCKIAN_COEFFS["cied"].keys()),
  4419. "WHITE_TEMP": 6_000,
  4420. "SAMPLING_TEMP_PROB": 0.4,
  4421. }
  4422. class PlanckianJitter(ImageOnlyTransform):
  4423. """Applies Planckian Jitter to the input image, simulating color temperature variations in illumination.
  4424. This transform adjusts the color of an image to mimic the effect of different color temperatures
  4425. of light sources, based on Planck's law of black body radiation. It can simulate the appearance
  4426. of an image under various lighting conditions, from warm (reddish) to cool (bluish) color casts.
  4427. PlanckianJitter vs. ColorJitter:
  4428. PlanckianJitter is fundamentally different from ColorJitter in its approach and use cases:
  4429. 1. Physics-based: PlanckianJitter is grounded in the physics of light, simulating real-world
  4430. color temperature changes. ColorJitter applies arbitrary color adjustments.
  4431. 2. Natural effects: This transform produces color shifts that correspond to natural lighting
  4432. variations, making it ideal for outdoor scene simulation or color constancy problems.
  4433. 3. Single parameter: Color changes are controlled by a single, physically meaningful parameter
  4434. (color temperature), unlike ColorJitter's multiple abstract parameters.
  4435. 4. Correlated changes: Color shifts are correlated across channels in a way that mimics natural
  4436. light, whereas ColorJitter can make independent channel adjustments.
  4437. When to use PlanckianJitter:
  4438. - Simulating different times of day or lighting conditions in outdoor scenes
  4439. - Augmenting data for computer vision tasks that need to be robust to natural lighting changes
  4440. - Preparing synthetic data to better match real-world lighting variations
  4441. - Color constancy research or applications
  4442. - When you need physically plausible color variations rather than arbitrary color changes
  4443. The logic behind PlanckianJitter:
  4444. As the color temperature increases:
  4445. 1. Lower temperatures (around 3000K) produce warm, reddish tones, simulating sunset or incandescent lighting.
  4446. 2. Mid-range temperatures (around 5500K) correspond to daylight.
  4447. 3. Higher temperatures (above 7000K) result in cool, bluish tones, similar to overcast sky or shade.
  4448. This progression mimics the natural variation of sunlight throughout the day and in different weather conditions.
  4449. Args:
  4450. mode (Literal["blackbody", "cied"]): The mode of the transformation.
  4451. - "blackbody": Simulates blackbody radiation color changes.
  4452. - "cied": Uses the CIE D illuminant series for color temperature simulation.
  4453. Default: "blackbody"
  4454. temperature_limit (tuple[int, int] | None): The range of color temperatures (in Kelvin) to sample from.
  4455. - For "blackbody" mode: Should be within [3000K, 15000K]. Default: (3000, 15000)
  4456. - For "cied" mode: Should be within [4000K, 15000K]. Default: (4000, 15000)
  4457. If None, the default ranges will be used based on the selected mode.
  4458. Higher temperatures produce cooler (bluish) images, lower temperatures produce warmer (reddish) images.
  4459. sampling_method (Literal["uniform", "gaussian"]): Method to sample the temperature.
  4460. - "uniform": Samples uniformly across the specified range.
  4461. - "gaussian": Samples from a Gaussian distribution centered at 6500K (approximate daylight).
  4462. Default: "uniform"
  4463. p (float): Probability of applying the transform. Default: 0.5
  4464. Targets:
  4465. image
  4466. Image types:
  4467. uint8, float32
  4468. Number of channels:
  4469. 3
  4470. Note:
  4471. - The transform preserves the overall brightness of the image while shifting its color.
  4472. - The "blackbody" mode provides a wider range of color shifts, especially in the lower (warmer) temperatures.
  4473. - The "cied" mode is based on standard illuminants and may provide more realistic daylight variations.
  4474. - The Gaussian sampling method tends to produce more subtle variations, as it's centered around daylight.
  4475. - Unlike ColorJitter, this transform ensures that color changes are physically plausible and correlated
  4476. across channels, maintaining the natural appearance of the scene under different lighting conditions.
  4477. Examples:
  4478. >>> import numpy as np
  4479. >>> import albumentations as A
  4480. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  4481. >>> transform = A.PlanckianJitter(mode="blackbody",
  4482. ... temperature_range=(3000, 9000),
  4483. ... sampling_method="uniform",
  4484. ... p=1.0)
  4485. >>> result = transform(image=image)
  4486. >>> jittered_image = result["image"]
  4487. References:
  4488. - Planck's law: https://en.wikipedia.org/wiki/Planck%27s_law
  4489. - CIE Standard Illuminants: https://en.wikipedia.org/wiki/Standard_illuminant
  4490. - Color temperature: https://en.wikipedia.org/wiki/Color_temperature
  4491. - Implementation inspired by: https://github.com/TheZino/PlanckianJitter
  4492. """
  4493. class InitSchema(BaseTransformInitSchema):
  4494. mode: Literal["blackbody", "cied"]
  4495. temperature_limit: Annotated[tuple[int, int], AfterValidator(nondecreasing)] | None
  4496. sampling_method: Literal["uniform", "gaussian"]
  4497. @model_validator(mode="after")
  4498. def _validate_temperature(self) -> Self:
  4499. max_temp = int(PLANKIAN_JITTER_CONST["MAX_TEMP"])
  4500. if self.temperature_limit is None:
  4501. if self.mode == "blackbody":
  4502. self.temperature_limit = (
  4503. int(PLANKIAN_JITTER_CONST["MIN_BLACKBODY_TEMP"]),
  4504. max_temp,
  4505. )
  4506. elif self.mode == "cied":
  4507. self.temperature_limit = (
  4508. int(PLANKIAN_JITTER_CONST["MIN_CIED_TEMP"]),
  4509. max_temp,
  4510. )
  4511. else:
  4512. if self.mode == "blackbody" and (
  4513. min(self.temperature_limit) < PLANKIAN_JITTER_CONST["MIN_BLACKBODY_TEMP"]
  4514. or max(self.temperature_limit) > max_temp
  4515. ):
  4516. raise ValueError(
  4517. "Temperature limits for blackbody should be in [3000, 15000] range",
  4518. )
  4519. if self.mode == "cied" and (
  4520. min(self.temperature_limit) < PLANKIAN_JITTER_CONST["MIN_CIED_TEMP"]
  4521. or max(self.temperature_limit) > max_temp
  4522. ):
  4523. raise ValueError(
  4524. "Temperature limits for CIED should be in [4000, 15000] range",
  4525. )
  4526. if not self.temperature_limit[0] <= PLANKIAN_JITTER_CONST["WHITE_TEMP"] <= self.temperature_limit[1]:
  4527. raise ValueError(
  4528. "White temperature should be within the temperature limits",
  4529. )
  4530. return self
  4531. def __init__(
  4532. self,
  4533. mode: Literal["blackbody", "cied"] = "blackbody",
  4534. temperature_limit: tuple[int, int] | None = None,
  4535. sampling_method: Literal["uniform", "gaussian"] = "uniform",
  4536. p: float = 0.5,
  4537. ) -> None:
  4538. super().__init__(p=p)
  4539. self.mode = mode
  4540. self.temperature_limit = cast("tuple[int, int]", temperature_limit)
  4541. self.sampling_method = sampling_method
  4542. def apply(self, img: np.ndarray, temperature: int, **params: Any) -> np.ndarray:
  4543. """Apply the PlanckianJitter transform to the input image.
  4544. Args:
  4545. img (np.ndarray): The input image to apply the PlanckianJitter transform to.
  4546. temperature (int): The temperature to apply to the image.
  4547. **params (Any): Additional parameters for the transform.
  4548. """
  4549. non_rgb_error(img)
  4550. return fpixel.planckian_jitter(img, temperature, mode=self.mode)
  4551. def get_params(self) -> dict[str, Any]:
  4552. """Generate parameters for the PlanckianJitter transform.
  4553. Returns:
  4554. dict[str, Any]: The parameters of the transform.
  4555. """
  4556. sampling_prob_boundary = PLANKIAN_JITTER_CONST["SAMPLING_TEMP_PROB"]
  4557. sampling_temp_boundary = PLANKIAN_JITTER_CONST["WHITE_TEMP"]
  4558. if self.sampling_method == "uniform":
  4559. # Split into 2 cases to avoid selecting cold temperatures (>6000) too often
  4560. if self.py_random.random() < sampling_prob_boundary:
  4561. temperature = self.py_random.uniform(
  4562. self.temperature_limit[0],
  4563. sampling_temp_boundary,
  4564. )
  4565. else:
  4566. temperature = self.py_random.uniform(
  4567. sampling_temp_boundary,
  4568. self.temperature_limit[1],
  4569. )
  4570. elif self.sampling_method == "gaussian":
  4571. # Sample values from asymmetric gaussian distribution
  4572. if self.py_random.random() < sampling_prob_boundary:
  4573. # Left side
  4574. shift = np.abs(
  4575. self.py_random.gauss(
  4576. 0,
  4577. np.abs(sampling_temp_boundary - self.temperature_limit[0]) / 3,
  4578. ),
  4579. )
  4580. temperature = sampling_temp_boundary - shift
  4581. else:
  4582. # Right side
  4583. shift = np.abs(
  4584. self.py_random.gauss(
  4585. 0,
  4586. np.abs(self.temperature_limit[1] - sampling_temp_boundary) / 3,
  4587. ),
  4588. )
  4589. temperature = sampling_temp_boundary + shift
  4590. else:
  4591. raise ValueError(f"Unknown sampling method: {self.sampling_method}")
  4592. # Ensure temperature is within the valid range
  4593. temperature = np.clip(
  4594. temperature,
  4595. self.temperature_limit[0],
  4596. self.temperature_limit[1],
  4597. )
  4598. return {"temperature": int(temperature)}
  4599. class ShotNoise(ImageOnlyTransform):
  4600. """Apply shot noise to the image by modeling photon counting as a Poisson process.
  4601. Shot noise (also known as Poisson noise) occurs in imaging due to the quantum nature of light.
  4602. When photons hit an imaging sensor, they arrive at random times following Poisson statistics.
  4603. This transform simulates this physical process in linear light space by:
  4604. 1. Converting to linear space (removing gamma)
  4605. 2. Treating each pixel value as an expected photon count
  4606. 3. Sampling actual photon counts from a Poisson distribution
  4607. 4. Converting back to display space (reapplying gamma)
  4608. The noise characteristics follow real camera behavior:
  4609. - Noise variance equals signal mean in linear space (Poisson statistics)
  4610. - Brighter regions have more absolute noise but less relative noise
  4611. - Darker regions have less absolute noise but more relative noise
  4612. - Noise is generated independently for each pixel and color channel
  4613. Args:
  4614. scale_range (tuple[float, float]): Range for sampling the noise scale factor.
  4615. Represents the reciprocal of the expected photon count per unit intensity.
  4616. Higher values mean more noise:
  4617. - scale = 0.1: ~100 photons per unit intensity (low noise)
  4618. - scale = 1.0: ~1 photon per unit intensity (moderate noise)
  4619. - scale = 10.0: ~0.1 photons per unit intensity (high noise)
  4620. Default: (0.1, 0.3)
  4621. p (float): Probability of applying the transform. Default: 0.5
  4622. Targets:
  4623. image
  4624. Image types:
  4625. uint8, float32
  4626. Note:
  4627. - Performs calculations in linear light space (gamma = 2.2)
  4628. - Preserves the image's mean intensity
  4629. - Memory efficient with in-place operations
  4630. - Thread-safe with independent random seeds
  4631. Examples:
  4632. >>> import numpy as np
  4633. >>> import albumentations as A
  4634. >>> # Generate synthetic image
  4635. >>> image = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8)
  4636. >>> # Apply moderate shot noise
  4637. >>> transform = A.ShotNoise(scale_range=(0.1, 1.0), p=1.0)
  4638. >>> noisy_image = transform(image=image)["image"]
  4639. References:
  4640. - Shot noise: https://en.wikipedia.org/wiki/Shot_noise
  4641. - Original paper: https://doi.org/10.1002/andp.19183622304 (Schottky, 1918)
  4642. - Poisson process: https://en.wikipedia.org/wiki/Poisson_point_process
  4643. - Gamma correction: https://en.wikipedia.org/wiki/Gamma_correction
  4644. """
  4645. class InitSchema(BaseTransformInitSchema):
  4646. scale_range: Annotated[
  4647. tuple[float, float],
  4648. AfterValidator(nondecreasing),
  4649. AfterValidator(check_range_bounds(0, None)),
  4650. ]
  4651. def __init__(
  4652. self,
  4653. scale_range: tuple[float, float] = (0.1, 0.3),
  4654. p: float = 0.5,
  4655. ):
  4656. super().__init__(p=p)
  4657. self.scale_range = scale_range
  4658. def apply(
  4659. self,
  4660. img: np.ndarray,
  4661. scale: float,
  4662. random_seed: int,
  4663. **params: Any,
  4664. ) -> np.ndarray:
  4665. """Apply the ShotNoise transform to the input image.
  4666. Args:
  4667. img (np.ndarray): The input image to apply the ShotNoise transform to.
  4668. scale (float): The scale factor for the noise.
  4669. random_seed (int): The random seed for the noise.
  4670. **params (Any): Additional parameters for the transform.
  4671. """
  4672. return fpixel.shot_noise(img, scale, np.random.default_rng(random_seed))
  4673. def get_params(self) -> dict[str, Any]:
  4674. """Generate parameters for the ShotNoise transform.
  4675. Returns:
  4676. dict[str, Any]: The parameters of the transform.
  4677. """
  4678. return {
  4679. "scale": self.py_random.uniform(*self.scale_range),
  4680. "random_seed": self.random_generator.integers(0, 2**32 - 1),
  4681. }
  4682. class NoiseParamsBase(BaseModel):
  4683. """Base class for all noise parameter models."""
  4684. model_config = ConfigDict(extra="forbid")
  4685. noise_type: str
  4686. class UniformParams(NoiseParamsBase):
  4687. noise_type: Literal["uniform"] = "uniform"
  4688. ranges: list[Sequence[float]] = Field(min_length=1)
  4689. @field_validator("ranges", mode="after")
  4690. @classmethod
  4691. def validate_ranges(cls, v: list[Sequence[float]]) -> list[tuple[float, float]]:
  4692. result = []
  4693. for range_values in v:
  4694. if len(range_values) != PAIR:
  4695. raise ValueError("Each range must have exactly 2 values")
  4696. min_val, max_val = range_values
  4697. if not (-1 <= min_val <= max_val <= 1):
  4698. raise ValueError("Range values must be in [-1, 1] and min <= max")
  4699. result.append((float(min_val), float(max_val)))
  4700. return result
  4701. class GaussianParams(NoiseParamsBase):
  4702. noise_type: Literal["gaussian"] = "gaussian"
  4703. mean_range: Annotated[
  4704. Sequence[float],
  4705. AfterValidator(check_range_bounds(min_val=-1, max_val=1)),
  4706. ]
  4707. std_range: Annotated[
  4708. Sequence[float],
  4709. AfterValidator(check_range_bounds(min_val=0, max_val=1)),
  4710. ]
  4711. class LaplaceParams(NoiseParamsBase):
  4712. noise_type: Literal["laplace"] = "laplace"
  4713. mean_range: Annotated[
  4714. Sequence[float],
  4715. AfterValidator(check_range_bounds(min_val=-1, max_val=1)),
  4716. ]
  4717. scale_range: Annotated[
  4718. Sequence[float],
  4719. AfterValidator(check_range_bounds(min_val=0, max_val=1)),
  4720. ]
  4721. class BetaParams(NoiseParamsBase):
  4722. noise_type: Literal["beta"] = "beta"
  4723. alpha_range: Annotated[
  4724. Sequence[float],
  4725. AfterValidator(check_range_bounds(min_val=0)),
  4726. ]
  4727. beta_range: Annotated[
  4728. Sequence[float],
  4729. AfterValidator(check_range_bounds(min_val=0)),
  4730. ]
  4731. scale_range: Annotated[
  4732. Sequence[float],
  4733. AfterValidator(check_range_bounds(min_val=0, max_val=1)),
  4734. ]
  4735. NoiseParams = Annotated[
  4736. Union[UniformParams, GaussianParams, LaplaceParams, BetaParams],
  4737. Field(discriminator="noise_type"),
  4738. ]
  4739. class AdditiveNoise(ImageOnlyTransform):
  4740. """Apply random noise to image channels using various noise distributions.
  4741. This transform generates noise using different probability distributions and applies it
  4742. to image channels. The noise can be generated in three spatial modes and supports
  4743. multiple noise distributions, each with configurable parameters.
  4744. Args:
  4745. noise_type(Literal["uniform", "gaussian", "laplace", "beta"]): Type of noise distribution to use. Options:
  4746. - "uniform": Uniform distribution, good for simple random perturbations
  4747. - "gaussian": Normal distribution, models natural random processes
  4748. - "laplace": Similar to Gaussian but with heavier tails, good for outliers
  4749. - "beta": Flexible bounded distribution, can be symmetric or skewed
  4750. spatial_mode(Literal["constant", "per_pixel", "shared"]): How to generate and apply the noise. Options:
  4751. - "constant": One noise value per channel, fastest
  4752. - "per_pixel": Independent noise value for each pixel and channel, slowest
  4753. - "shared": One noise map shared across all channels, medium speed
  4754. approximation(float): float in [0, 1], default=1.0
  4755. Controls noise generation speed vs quality tradeoff.
  4756. - 1.0: Generate full resolution noise (slowest, highest quality)
  4757. - 0.5: Generate noise at half resolution and upsample
  4758. - 0.25: Generate noise at quarter resolution and upsample
  4759. Only affects 'per_pixel' and 'shared' spatial modes.
  4760. noise_params(dict[str, Any] | None): Parameters for the chosen noise distribution.
  4761. Must match the noise_type:
  4762. uniform:
  4763. ranges: list[tuple[float, float]]
  4764. List of (min, max) ranges for each channel.
  4765. Each range must be in [-1, 1].
  4766. If only one range is provided, it will be used for all channels.
  4767. [(-0.2, 0.2)] # Same range for all channels
  4768. [(-0.2, 0.2), (-0.1, 0.1), (-0.1, 0.1)] # Different ranges for RGB
  4769. gaussian:
  4770. mean_range: tuple[float, float], default (0.0, 0.0)
  4771. Range for sampling mean value, in [-1, 1]
  4772. std_range: tuple[float, float], default (0.1, 0.1)
  4773. Range for sampling standard deviation, in [0, 1]
  4774. laplace:
  4775. mean_range: tuple[float, float], default (0.0, 0.0)
  4776. Range for sampling location parameter, in [-1, 1]
  4777. scale_range: tuple[float, float], default (0.1, 0.1)
  4778. Range for sampling scale parameter, in [0, 1]
  4779. beta:
  4780. alpha_range: tuple[float, float], default (0.5, 1.5)
  4781. Value < 1 = U-shaped, Value > 1 = Bell-shaped
  4782. Range for sampling first shape parameter, in (0, inf)
  4783. beta_range: tuple[float, float], default (0.5, 1.5)
  4784. Value < 1 = U-shaped, Value > 1 = Bell-shaped
  4785. Range for sampling second shape parameter, in (0, inf)
  4786. scale_range: tuple[float, float], default (0.1, 0.3)
  4787. Smaller scale for subtler noise
  4788. Range for sampling output scale, in [0, 1]
  4789. Examples:
  4790. >>> # Constant RGB shift with different ranges per channel:
  4791. >>> transform = AdditiveNoise(
  4792. ... noise_type="uniform",
  4793. ... spatial_mode="constant",
  4794. ... noise_params={"ranges": [(-0.2, 0.2), (-0.1, 0.1), (-0.1, 0.1)]}
  4795. ... )
  4796. Gaussian noise shared across channels:
  4797. >>> transform = AdditiveNoise(
  4798. ... noise_type="gaussian",
  4799. ... spatial_mode="shared",
  4800. ... noise_params={"mean_range": (0.0, 0.0), "std_range": (0.05, 0.15)}
  4801. ... )
  4802. Note:
  4803. Performance considerations:
  4804. - "constant" mode is fastest as it generates only C values (C = number of channels)
  4805. - "shared" mode generates HxW values and reuses them for all channels
  4806. - "per_pixel" mode generates HxWxC values, slowest but most flexible
  4807. Distribution characteristics:
  4808. - uniform: Equal probability within range, good for simple perturbations
  4809. - gaussian: Bell-shaped, symmetric, good for natural noise
  4810. - laplace: Like gaussian but with heavier tails, good for outliers
  4811. - beta: Very flexible shape, can be uniform, bell-shaped, or U-shaped
  4812. Implementation details:
  4813. - All noise is generated in normalized range and scaled by image max value
  4814. - For uint8 images, final noise range is [-255, 255]
  4815. - For float images, final noise range is [-1, 1]
  4816. """
  4817. class InitSchema(BaseTransformInitSchema):
  4818. noise_type: Literal["uniform", "gaussian", "laplace", "beta"]
  4819. spatial_mode: Literal["constant", "per_pixel", "shared"]
  4820. noise_params: dict[str, Any] | None
  4821. approximation: float = Field(ge=0.0, le=1.0)
  4822. @model_validator(mode="after")
  4823. def _validate_noise_params(self) -> Self:
  4824. # Default parameters for each noise type
  4825. default_params = {
  4826. "uniform": {
  4827. "ranges": [(-0.1, 0.1)], # Single channel by default
  4828. },
  4829. "gaussian": {"mean_range": (0.0, 0.0), "std_range": (0.05, 0.15)},
  4830. "laplace": {"mean_range": (0.0, 0.0), "scale_range": (0.05, 0.15)},
  4831. "beta": {
  4832. "alpha_range": (0.5, 1.5),
  4833. "beta_range": (0.5, 1.5),
  4834. "scale_range": (0.1, 0.3),
  4835. },
  4836. }
  4837. # Use default params if none provided
  4838. params_dict = self.noise_params if self.noise_params is not None else default_params[self.noise_type]
  4839. # Add noise_type to params if not present
  4840. params_dict = {**params_dict, "noise_type": self.noise_type} # type: ignore[dict-item]
  4841. # Convert dict to appropriate NoiseParams object and validate
  4842. params_class = {
  4843. "uniform": UniformParams,
  4844. "gaussian": GaussianParams,
  4845. "laplace": LaplaceParams,
  4846. "beta": BetaParams,
  4847. }[self.noise_type]
  4848. # Validate using the appropriate NoiseParams class
  4849. validated_params = params_class(**params_dict)
  4850. # Store the validated parameters as a dict
  4851. self.noise_params = validated_params.model_dump()
  4852. return self
  4853. def __init__(
  4854. self,
  4855. noise_type: Literal["uniform", "gaussian", "laplace", "beta"] = "uniform",
  4856. spatial_mode: Literal["constant", "per_pixel", "shared"] = "constant",
  4857. noise_params: dict[str, Any] | None = None,
  4858. approximation: float = 1.0,
  4859. p: float = 0.5,
  4860. ):
  4861. super().__init__(p=p)
  4862. self.noise_type = noise_type
  4863. self.spatial_mode = spatial_mode
  4864. self.noise_params = noise_params
  4865. self.approximation = approximation
  4866. def apply(
  4867. self,
  4868. img: np.ndarray,
  4869. noise_map: np.ndarray,
  4870. **params: Any,
  4871. ) -> np.ndarray:
  4872. """Apply the AdditiveNoise transform to the input image.
  4873. Args:
  4874. img (np.ndarray): The input image to apply the AdditiveNoise transform to.
  4875. noise_map (np.ndarray): The noise map to apply to the image.
  4876. **params (Any): Additional parameters for the transform.
  4877. """
  4878. return fpixel.add_noise(img, noise_map)
  4879. def get_params_dependent_on_data(
  4880. self,
  4881. params: dict[str, Any],
  4882. data: dict[str, Any],
  4883. ) -> dict[str, Any]:
  4884. """Generate parameters for the AdditiveNoise transform.
  4885. Args:
  4886. params (dict[str, Any]): The parameters of the transform.
  4887. data (dict[str, Any]): The data to apply the transform to.
  4888. """
  4889. image = data["image"] if "image" in data else data["images"][0]
  4890. max_value = MAX_VALUES_BY_DTYPE[image.dtype]
  4891. noise_map = fpixel.generate_noise(
  4892. noise_type=self.noise_type,
  4893. spatial_mode=self.spatial_mode,
  4894. shape=image.shape,
  4895. params=self.noise_params,
  4896. max_value=max_value,
  4897. approximation=self.approximation,
  4898. random_generator=self.random_generator,
  4899. )
  4900. return {"noise_map": noise_map}
  4901. class RGBShift(AdditiveNoise):
  4902. """Randomly shift values for each channel of the input RGB image.
  4903. A specialized version of AdditiveNoise that applies constant uniform shifts to RGB channels.
  4904. Each channel (R,G,B) can have its own shift range specified.
  4905. Args:
  4906. r_shift_limit ((int, int) or int): Range for shifting the red channel. Options:
  4907. - If tuple (min, max): Sample shift value from this range
  4908. - If int: Sample shift value from (-r_shift_limit, r_shift_limit)
  4909. - For uint8 images: Values represent absolute shifts in [0, 255]
  4910. - For float images: Values represent relative shifts in [0, 1]
  4911. Default: (-20, 20)
  4912. g_shift_limit ((int, int) or int): Range for shifting the green channel. Options:
  4913. - If tuple (min, max): Sample shift value from this range
  4914. - If int: Sample shift value from (-g_shift_limit, g_shift_limit)
  4915. - For uint8 images: Values represent absolute shifts in [0, 255]
  4916. - For float images: Values represent relative shifts in [0, 1]
  4917. Default: (-20, 20)
  4918. b_shift_limit ((int, int) or int): Range for shifting the blue channel. Options:
  4919. - If tuple (min, max): Sample shift value from this range
  4920. - If int: Sample shift value from (-b_shift_limit, b_shift_limit)
  4921. - For uint8 images: Values represent absolute shifts in [0, 255]
  4922. - For float images: Values represent relative shifts in [0, 1]
  4923. Default: (-20, 20)
  4924. p (float): Probability of applying the transform. Default: 0.5.
  4925. Targets:
  4926. image
  4927. Image types:
  4928. uint8, float32
  4929. Note:
  4930. - Values are shifted independently for each channel
  4931. - For uint8 images:
  4932. * Input ranges like (-20, 20) represent pixel value shifts
  4933. * A shift of 20 means adding 20 to that channel
  4934. * Final values are clipped to [0, 255]
  4935. - For float32 images:
  4936. * Input ranges like (-0.1, 0.1) represent relative shifts
  4937. * A shift of 0.1 means adding 0.1 to that channel
  4938. * Final values are clipped to [0, 1]
  4939. Examples:
  4940. >>> import numpy as np
  4941. >>> import albumentations as A
  4942. # Shift RGB channels of uint8 image
  4943. >>> transform = A.RGBShift(
  4944. ... r_shift_limit=30, # Will sample red shift from [-30, 30]
  4945. ... g_shift_limit=(-20, 20), # Will sample green shift from [-20, 20]
  4946. ... b_shift_limit=(-10, 10), # Will sample blue shift from [-10, 10]
  4947. ... p=1.0
  4948. ... )
  4949. >>> image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  4950. >>> shifted = transform(image=image)["image"]
  4951. # Same effect using AdditiveNoise
  4952. >>> transform = A.AdditiveNoise(
  4953. ... noise_type="uniform",
  4954. ... spatial_mode="constant", # One value per channel
  4955. ... noise_params={
  4956. ... "ranges": [(-30/255, 30/255), (-20/255, 20/255), (-10/255, 10/255)]
  4957. ... },
  4958. ... p=1.0
  4959. ... )
  4960. See Also:
  4961. - AdditiveNoise: More general noise transform with various options:
  4962. * Different noise distributions (uniform, gaussian, laplace, beta)
  4963. * Spatial modes (constant, per-pixel, shared)
  4964. * Approximation for faster computation
  4965. - RandomToneCurve: For non-linear color transformations
  4966. - RandomBrightnessContrast: For combined brightness and contrast adjustments
  4967. - PlankianJitter: For color temperature adjustments
  4968. - HueSaturationValue: For HSV color space adjustments
  4969. - ColorJitter: For combined brightness, contrast, saturation adjustments
  4970. """
  4971. class InitSchema(BaseTransformInitSchema):
  4972. r_shift_limit: SymmetricRangeType
  4973. g_shift_limit: SymmetricRangeType
  4974. b_shift_limit: SymmetricRangeType
  4975. def __init__(
  4976. self,
  4977. r_shift_limit: tuple[float, float] | float = (-20, 20),
  4978. g_shift_limit: tuple[float, float] | float = (-20, 20),
  4979. b_shift_limit: tuple[float, float] | float = (-20, 20),
  4980. p: float = 0.5,
  4981. ):
  4982. # Convert RGB shift limits to normalized ranges if needed
  4983. def normalize_range(limit: tuple[float, float]) -> tuple[float, float]:
  4984. # If any value is > 1, assume uint8 range and normalize
  4985. if abs(limit[0]) > 1 or abs(limit[1]) > 1:
  4986. return (limit[0] / 255.0, limit[1] / 255.0)
  4987. return limit
  4988. ranges = [
  4989. normalize_range(cast("tuple[float, float]", r_shift_limit)),
  4990. normalize_range(cast("tuple[float, float]", g_shift_limit)),
  4991. normalize_range(cast("tuple[float, float]", b_shift_limit)),
  4992. ]
  4993. # Initialize with fixed noise type and spatial mode
  4994. super().__init__(
  4995. noise_type="uniform",
  4996. spatial_mode="constant",
  4997. noise_params={"ranges": ranges},
  4998. approximation=1.0,
  4999. p=p,
  5000. )
  5001. # Store original limits for get_transform_init_args
  5002. self.r_shift_limit = cast("tuple[float, float]", r_shift_limit)
  5003. self.g_shift_limit = cast("tuple[float, float]", g_shift_limit)
  5004. self.b_shift_limit = cast("tuple[float, float]", b_shift_limit)
  5005. class SaltAndPepper(ImageOnlyTransform):
  5006. """Apply salt and pepper noise to the input image.
  5007. Salt and pepper noise is a form of impulse noise that randomly sets pixels to either maximum value (salt)
  5008. or minimum value (pepper). The amount and proportion of salt vs pepper noise can be controlled.
  5009. The same noise mask is applied to all channels of the image to preserve color consistency.
  5010. Args:
  5011. amount ((float, float)): Range for total amount of noise (both salt and pepper).
  5012. Values between 0 and 1. For example:
  5013. - 0.05 means 5% of all pixels will be replaced with noise
  5014. - (0.01, 0.06) will sample amount uniformly from 1% to 6%
  5015. Default: (0.01, 0.06)
  5016. salt_vs_pepper ((float, float)): Range for ratio of salt (white) vs pepper (black) noise.
  5017. Values between 0 and 1. For example:
  5018. - 0.5 means equal amounts of salt and pepper
  5019. - 0.7 means 70% of noisy pixels will be salt, 30% pepper
  5020. - (0.4, 0.6) will sample ratio uniformly from 40% to 60%
  5021. Default: (0.4, 0.6)
  5022. p (float): Probability of applying the transform. Default: 0.5.
  5023. Targets:
  5024. image
  5025. Image types:
  5026. uint8, float32
  5027. Note:
  5028. - Salt noise sets pixels to maximum value (255 for uint8, 1.0 for float32)
  5029. - Pepper noise sets pixels to 0
  5030. - The noise mask is generated once and applied to all channels to maintain
  5031. color consistency (i.e., if a pixel is set to salt, all its color channels
  5032. will be set to maximum value)
  5033. - The exact number of affected pixels matches the specified amount as masks
  5034. are generated without overlap
  5035. Mathematical Formulation:
  5036. For an input image I, the output O is:
  5037. O[c,x,y] = max_value, if salt_mask[x,y] = True
  5038. O[c,x,y] = 0, if pepper_mask[x,y] = True
  5039. O[c,x,y] = I[c,x,y], otherwise
  5040. where:
  5041. - c is the channel index
  5042. - salt_mask and pepper_mask are 2D boolean arrays applied to all channels
  5043. - Number of True values in salt_mask = floor(H*W * amount * salt_ratio)
  5044. - Number of True values in pepper_mask = floor(H*W * amount * (1 - salt_ratio))
  5045. - amount ∈ [amount_min, amount_max]
  5046. - salt_ratio ∈ [salt_vs_pepper_min, salt_vs_pepper_max]
  5047. Examples:
  5048. >>> import albumentations as A
  5049. >>> import numpy as np
  5050. # Apply salt and pepper noise with default parameters
  5051. >>> transform = A.SaltAndPepper(p=1.0)
  5052. >>> noisy_image = transform(image=image)["image"]
  5053. # Heavy noise with more salt than pepper
  5054. >>> transform = A.SaltAndPepper(
  5055. ... amount=(0.1, 0.2), # 10-20% of pixels will be noisy
  5056. ... salt_vs_pepper=(0.7, 0.9), # 70-90% of noise will be salt
  5057. ... p=1.0
  5058. ... )
  5059. >>> noisy_image = transform(image=image)["image"]
  5060. References:
  5061. - Digital Image Processing: Rafael C. Gonzalez and Richard E. Woods, 4th Edition,
  5062. Chapter 5: Image Restoration and Reconstruction.
  5063. - Fundamentals of Digital Image Processing: A. K. Jain, Chapter 7: Image Degradation and Restoration.
  5064. - Salt and pepper noise: https://en.wikipedia.org/wiki/Salt-and-pepper_noise
  5065. See Also:
  5066. - GaussNoise: For additive Gaussian noise
  5067. - MultiplicativeNoise: For multiplicative noise
  5068. - ISONoise: For camera sensor noise simulation
  5069. """
  5070. class InitSchema(BaseTransformInitSchema):
  5071. amount: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, 1))]
  5072. salt_vs_pepper: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, 1))]
  5073. def __init__(
  5074. self,
  5075. amount: tuple[float, float] = (0.01, 0.06),
  5076. salt_vs_pepper: tuple[float, float] = (0.4, 0.6),
  5077. p: float = 0.5,
  5078. ):
  5079. super().__init__(p=p)
  5080. self.amount = amount
  5081. self.salt_vs_pepper = salt_vs_pepper
  5082. def get_params_dependent_on_data(
  5083. self,
  5084. params: dict[str, Any],
  5085. data: dict[str, Any],
  5086. ) -> dict[str, Any]:
  5087. """Generate parameters for the SaltAndPepper transform.
  5088. Args:
  5089. params (dict[str, Any]): The parameters of the transform.
  5090. data (dict[str, Any]): The data to apply the transform to.
  5091. """
  5092. image = data["image"] if "image" in data else data["images"][0]
  5093. height, width = image.shape[:2]
  5094. total_amount = self.py_random.uniform(*self.amount)
  5095. salt_ratio = self.py_random.uniform(*self.salt_vs_pepper)
  5096. area = height * width
  5097. num_pixels = int(area * total_amount)
  5098. num_salt = int(num_pixels * salt_ratio)
  5099. # Generate all positions at once
  5100. noise_positions = self.random_generator.choice(area, size=num_pixels, replace=False)
  5101. # Create masks
  5102. salt_mask = np.zeros(area, dtype=bool)
  5103. pepper_mask = np.zeros(area, dtype=bool)
  5104. # Set salt and pepper positions
  5105. salt_mask[noise_positions[:num_salt]] = True
  5106. pepper_mask[noise_positions[num_salt:]] = True
  5107. # Reshape to 2D
  5108. salt_mask = salt_mask.reshape(height, width)
  5109. pepper_mask = pepper_mask.reshape(height, width)
  5110. return {
  5111. "salt_mask": salt_mask,
  5112. "pepper_mask": pepper_mask,
  5113. }
  5114. def apply(
  5115. self,
  5116. img: np.ndarray,
  5117. salt_mask: np.ndarray,
  5118. pepper_mask: np.ndarray,
  5119. **params: Any,
  5120. ) -> np.ndarray:
  5121. """Apply the SaltAndPepper transform to the input image.
  5122. Args:
  5123. img (np.ndarray): The input image to apply the SaltAndPepper transform to.
  5124. salt_mask (np.ndarray): The salt mask to apply to the image.
  5125. pepper_mask (np.ndarray): The pepper mask to apply to the image.
  5126. **params (Any): Additional parameters for the transform.
  5127. """
  5128. return fpixel.apply_salt_and_pepper(img, salt_mask, pepper_mask)
  5129. class PlasmaBrightnessContrast(ImageOnlyTransform):
  5130. """Apply plasma fractal pattern to modify image brightness and contrast.
  5131. Uses Diamond-Square algorithm to generate organic-looking fractal patterns
  5132. that create spatially-varying brightness and contrast adjustments.
  5133. Args:
  5134. brightness_range ((float, float)): Range for brightness adjustment strength.
  5135. Values between -1 and 1:
  5136. - Positive values increase brightness
  5137. - Negative values decrease brightness
  5138. - 0 means no brightness change
  5139. Default: (-0.3, 0.3)
  5140. contrast_range ((float, float)): Range for contrast adjustment strength.
  5141. Values between -1 and 1:
  5142. - Positive values increase contrast
  5143. - Negative values decrease contrast
  5144. - 0 means no contrast change
  5145. Default: (-0.3, 0.3)
  5146. plasma_size (int): Size of the initial plasma pattern grid.
  5147. Larger values create more detailed patterns but are slower to compute.
  5148. The pattern will be resized to match the input image dimensions.
  5149. Default: 256
  5150. roughness (float): Controls how quickly the noise amplitude increases at each iteration.
  5151. Must be greater than 0:
  5152. - Low values (< 1.0): Smoother, more gradual pattern
  5153. - Medium values (~2.0): Natural-looking pattern
  5154. - High values (> 3.0): Very rough, noisy pattern
  5155. Default: 3.0
  5156. p (float): Probability of applying the transform. Default: 0.5.
  5157. Targets:
  5158. image
  5159. Image types:
  5160. uint8, float32
  5161. Note:
  5162. - Works with any number of channels (grayscale, RGB, multispectral)
  5163. - The same plasma pattern is applied to all channels
  5164. - Operations are performed in float32 precision
  5165. - Final values are clipped to valid range [0, max_value]
  5166. Mathematical Formulation:
  5167. 1. Plasma Pattern Generation (Diamond-Square Algorithm):
  5168. Starting with a 3x3 grid of random values in [-1, 1], iteratively:
  5169. a) Diamond Step: For each 2x2 cell, compute center using diamond kernel:
  5170. [[0.25, 0.0, 0.25],
  5171. [0.0, 0.0, 0.0 ],
  5172. [0.25, 0.0, 0.25]]
  5173. b) Square Step: Fill remaining points using square kernel:
  5174. [[0.0, 0.25, 0.0 ],
  5175. [0.25, 0.0, 0.25],
  5176. [0.0, 0.25, 0.0 ]]
  5177. c) Add random noise scaled by roughness^iteration
  5178. d) Normalize final pattern P to [0,1] range using min-max normalization
  5179. 2. Brightness Adjustment:
  5180. For each pixel (x,y):
  5181. O(x,y) = I(x,y) + b·P(x,y)
  5182. where:
  5183. - I is the input image
  5184. - b is the brightness factor
  5185. - P is the normalized plasma pattern
  5186. 3. Contrast Adjustment:
  5187. For each pixel (x,y):
  5188. O(x,y) = I(x,y)·(1 + c·P(x,y)) + μ·(1 - (1 + c·P(x,y)))
  5189. where:
  5190. - I is the input image
  5191. - c is the contrast factor
  5192. - P is the normalized plasma pattern
  5193. - μ is the mean pixel value
  5194. Examples:
  5195. >>> import albumentations as A
  5196. >>> import numpy as np
  5197. # Default parameters
  5198. >>> transform = A.PlasmaBrightnessContrast(p=1.0)
  5199. # Custom adjustments
  5200. >>> transform = A.PlasmaBrightnessContrast(
  5201. ... brightness_range=(-0.5, 0.5),
  5202. ... contrast_range=(-0.3, 0.3),
  5203. ... plasma_size=512, # More detailed pattern
  5204. ... roughness=0.7, # Smoother transitions
  5205. ... p=1.0
  5206. ... )
  5207. References:
  5208. - Fournier, Fussell, and Carpenter, "Computer rendering of stochastic models,": Communications of
  5209. the ACM, 1982. Paper introducing the Diamond-Square algorithm.
  5210. - Diamond-Square algorithm: https://en.wikipedia.org/wiki/Diamond-square_algorithm
  5211. See Also:
  5212. - RandomBrightnessContrast: For uniform brightness/contrast adjustments
  5213. - CLAHE: For contrast limited adaptive histogram equalization
  5214. - FancyPCA: For color-based contrast enhancement
  5215. - HistogramMatching: For reference-based contrast adjustment
  5216. """
  5217. class InitSchema(BaseTransformInitSchema):
  5218. brightness_range: Annotated[
  5219. tuple[float, float],
  5220. AfterValidator(check_range_bounds(-1, 1)),
  5221. ]
  5222. contrast_range: Annotated[
  5223. tuple[float, float],
  5224. AfterValidator(check_range_bounds(-1, 1)),
  5225. ]
  5226. plasma_size: int = Field(ge=1)
  5227. roughness: float = Field(gt=0)
  5228. def __init__(
  5229. self,
  5230. brightness_range: tuple[float, float] = (-0.3, 0.3),
  5231. contrast_range: tuple[float, float] = (-0.3, 0.3),
  5232. plasma_size: int = 256,
  5233. roughness: float = 3.0,
  5234. p: float = 0.5,
  5235. ):
  5236. super().__init__(p=p)
  5237. self.brightness_range = brightness_range
  5238. self.contrast_range = contrast_range
  5239. self.plasma_size = plasma_size
  5240. self.roughness = roughness
  5241. def get_params_dependent_on_data(
  5242. self,
  5243. params: dict[str, Any],
  5244. data: dict[str, Any],
  5245. ) -> dict[str, Any]:
  5246. """Generate parameters for the PlasmaBrightnessContrast transform.
  5247. Args:
  5248. params (dict[str, Any]): The parameters of the transform.
  5249. data (dict[str, Any]): The data to apply the transform to.
  5250. """
  5251. shape = params["shape"]
  5252. # Sample adjustment strengths
  5253. brightness = self.py_random.uniform(*self.brightness_range)
  5254. contrast = self.py_random.uniform(*self.contrast_range)
  5255. # Generate plasma pattern
  5256. plasma = fpixel.generate_plasma_pattern(
  5257. target_shape=shape[:2],
  5258. roughness=self.roughness,
  5259. random_generator=self.random_generator,
  5260. )
  5261. return {
  5262. "brightness_factor": brightness,
  5263. "contrast_factor": contrast,
  5264. "plasma_pattern": plasma,
  5265. }
  5266. def apply(
  5267. self,
  5268. img: np.ndarray,
  5269. brightness_factor: float,
  5270. contrast_factor: float,
  5271. plasma_pattern: np.ndarray,
  5272. **params: Any,
  5273. ) -> np.ndarray:
  5274. """Apply the PlasmaBrightnessContrast transform to the input image.
  5275. Args:
  5276. img (np.ndarray): The input image to apply the PlasmaBrightnessContrast transform to.
  5277. brightness_factor (float): The brightness factor to apply to the image.
  5278. contrast_factor (float): The contrast factor to apply to the image.
  5279. plasma_pattern (np.ndarray): The plasma pattern to apply to the image.
  5280. **params (Any): Additional parameters for the transform.
  5281. """
  5282. return fpixel.apply_plasma_brightness_contrast(
  5283. img,
  5284. brightness_factor,
  5285. contrast_factor,
  5286. plasma_pattern,
  5287. )
  5288. @batch_transform("spatial", keep_depth_dim=False, has_batch_dim=True, has_depth_dim=False)
  5289. def apply_to_images(self, images: np.ndarray, **params: Any) -> np.ndarray:
  5290. """Apply the PlasmaBrightnessContrast transform to a batch of images.
  5291. Args:
  5292. images (np.ndarray): The input images to apply the PlasmaBrightnessContrast transform to.
  5293. **params (Any): Additional parameters for the transform.
  5294. """
  5295. return self.apply(images, **params)
  5296. @batch_transform("spatial", keep_depth_dim=True, has_batch_dim=False, has_depth_dim=True)
  5297. def apply_to_volume(self, volume: np.ndarray, **params: Any) -> np.ndarray:
  5298. """Apply the PlasmaBrightnessContrast transform to a volume.
  5299. Args:
  5300. volume (np.ndarray): The input volume to apply the PlasmaBrightnessContrast transform to.
  5301. **params (Any): Additional parameters for the transform.
  5302. """
  5303. return self.apply(volume, **params)
  5304. @batch_transform("spatial", keep_depth_dim=True, has_batch_dim=True, has_depth_dim=True)
  5305. def apply_to_volumes(self, volumes: np.ndarray, **params: Any) -> np.ndarray:
  5306. """Apply the PlasmaBrightnessContrast transform to a batch of volumes.
  5307. Args:
  5308. volumes (np.ndarray): The input volumes to apply the PlasmaBrightnessContrast transform to.
  5309. **params (Any): Additional parameters for the transform.
  5310. """
  5311. return self.apply(volumes, **params)
  5312. class PlasmaShadow(ImageOnlyTransform):
  5313. """Apply plasma-based shadow effect to the image using Diamond-Square algorithm.
  5314. Creates organic-looking shadows using plasma fractal noise pattern.
  5315. The shadow intensity varies smoothly across the image, creating natural-looking
  5316. darkening effects that can simulate shadows, shading, or lighting variations.
  5317. Args:
  5318. shadow_intensity_range (tuple[float, float]): Range for shadow intensity.
  5319. Values between 0 and 1:
  5320. - 0 means no shadow (original image)
  5321. - 1 means maximum darkening (black)
  5322. - Values between create partial shadows
  5323. Default: (0.3, 0.7)
  5324. roughness (float): Controls how quickly the noise amplitude increases at each iteration.
  5325. Must be greater than 0:
  5326. - Low values (< 1.0): Smoother, more gradual shadows
  5327. - Medium values (~2.0): Natural-looking shadows
  5328. - High values (> 3.0): Very rough, noisy shadows
  5329. Default: 3.0
  5330. p (float): Probability of applying the transform. Default: 0.5.
  5331. Targets:
  5332. image
  5333. Image types:
  5334. uint8, float32
  5335. Note:
  5336. - The transform darkens the image using a plasma pattern
  5337. - Works with any number of channels (grayscale, RGB, multispectral)
  5338. - Shadow pattern is generated using Diamond-Square algorithm with specific kernels
  5339. - The same shadow pattern is applied to all channels
  5340. - Final values are clipped to valid range [0, max_value]
  5341. Mathematical Formulation:
  5342. 1. Plasma Pattern Generation (Diamond-Square Algorithm):
  5343. Starting with a 3x3 grid of random values in [-1, 1], iteratively:
  5344. a) Diamond Step: For each 2x2 cell, compute center using diamond kernel:
  5345. [[0.25, 0.0, 0.25],
  5346. [0.0, 0.0, 0.0 ],
  5347. [0.25, 0.0, 0.25]]
  5348. b) Square Step: Fill remaining points using square kernel:
  5349. [[0.0, 0.25, 0.0 ],
  5350. [0.25, 0.0, 0.25],
  5351. [0.0, 0.25, 0.0 ]]
  5352. c) Add random noise scaled by roughness^iteration
  5353. d) Normalize final pattern P to [0,1] range using min-max normalization
  5354. 2. Shadow Application:
  5355. For each pixel (x,y):
  5356. O(x,y) = I(x,y) * (1 - i*P(x,y))
  5357. where:
  5358. - I is the input image
  5359. - P is the normalized plasma pattern
  5360. - i is the sampled shadow intensity
  5361. - O is the output image
  5362. Examples:
  5363. >>> import albumentations as A
  5364. >>> import numpy as np
  5365. # Default parameters for natural shadows
  5366. >>> transform = A.PlasmaShadow(p=1.0)
  5367. # Subtle, smooth shadows
  5368. >>> transform = A.PlasmaShadow(
  5369. ... shadow_intensity_range=(0.1, 0.3),
  5370. ... roughness=0.7,
  5371. ... p=1.0
  5372. ... )
  5373. # Dramatic, detailed shadows
  5374. >>> transform = A.PlasmaShadow(
  5375. ... shadow_intensity_range=(0.5, 0.9),
  5376. ... roughness=0.3,
  5377. ... p=1.0
  5378. ... )
  5379. References:
  5380. - Fournier, Fussell, and Carpenter, "Computer rendering of stochastic models,": Communications of
  5381. the ACM, 1982. Paper introducing the Diamond-Square algorithm.
  5382. - Diamond-Square algorithm: https://en.wikipedia.org/wiki/Diamond-square_algorithm
  5383. See Also:
  5384. - PlasmaBrightnessContrast: For brightness/contrast adjustments using plasma patterns
  5385. - RandomShadow: For geometric shadow effects
  5386. - RandomToneCurve: For global lighting adjustments
  5387. - PlasmaBrightnessContrast: For brightness/contrast adjustments using plasma patterns
  5388. """
  5389. class InitSchema(BaseTransformInitSchema):
  5390. shadow_intensity_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, 1))]
  5391. roughness: float = Field(gt=0)
  5392. def __init__(
  5393. self,
  5394. shadow_intensity_range: tuple[float, float] = (0.3, 0.7),
  5395. plasma_size: int = 256,
  5396. roughness: float = 3.0,
  5397. p: float = 0.5,
  5398. ):
  5399. super().__init__(p=p)
  5400. self.shadow_intensity_range = shadow_intensity_range
  5401. self.plasma_size = plasma_size
  5402. self.roughness = roughness
  5403. def get_params_dependent_on_data(
  5404. self,
  5405. params: dict[str, Any],
  5406. data: dict[str, Any],
  5407. ) -> dict[str, Any]:
  5408. """Generate parameters for the PlasmaShadow transform.
  5409. Args:
  5410. params (dict[str, Any]): The parameters of the transform.
  5411. data (dict[str, Any]): The data to apply the transform to.
  5412. """
  5413. shape = params["shape"]
  5414. # Sample shadow intensity
  5415. intensity = self.py_random.uniform(*self.shadow_intensity_range)
  5416. # Generate plasma pattern
  5417. plasma = fpixel.generate_plasma_pattern(
  5418. target_shape=shape[:2],
  5419. roughness=self.roughness,
  5420. random_generator=self.random_generator,
  5421. )
  5422. return {
  5423. "intensity": intensity,
  5424. "plasma_pattern": plasma,
  5425. }
  5426. def apply(
  5427. self,
  5428. img: np.ndarray,
  5429. intensity: float,
  5430. plasma_pattern: np.ndarray,
  5431. **params: Any,
  5432. ) -> np.ndarray:
  5433. """Apply the PlasmaShadow transform to the input image.
  5434. Args:
  5435. img (np.ndarray): The input image to apply the PlasmaShadow transform to.
  5436. intensity (float): The intensity of the shadow to apply to the image.
  5437. plasma_pattern (np.ndarray): The plasma pattern to apply to the image.
  5438. **params (Any): Additional parameters for the transform.
  5439. """
  5440. return fpixel.apply_plasma_shadow(img, intensity, plasma_pattern)
  5441. @batch_transform("spatial", keep_depth_dim=False, has_batch_dim=True, has_depth_dim=False)
  5442. def apply_to_images(self, images: np.ndarray, **params: Any) -> np.ndarray:
  5443. """Apply the PlasmaShadow transform to a batch of images.
  5444. Args:
  5445. images (np.ndarray): The input images to apply the PlasmaShadow transform to.
  5446. **params (Any): Additional parameters for the transform.
  5447. """
  5448. return self.apply(images, **params)
  5449. @batch_transform("spatial", keep_depth_dim=True, has_batch_dim=False, has_depth_dim=True)
  5450. def apply_to_volume(self, volume: np.ndarray, **params: Any) -> np.ndarray:
  5451. """Apply the PlasmaShadow transform to a batch of volume.
  5452. Args:
  5453. volume (np.ndarray): The input volume to apply the PlasmaShadow transform to.
  5454. **params (Any): Additional parameters for the transform.
  5455. """
  5456. return self.apply(volume, **params)
  5457. @batch_transform("spatial", keep_depth_dim=True, has_batch_dim=True, has_depth_dim=True)
  5458. def apply_to_volumes(self, volumes: np.ndarray, **params: Any) -> np.ndarray:
  5459. """Apply the PlasmaShadow transform to a batch of volumes.
  5460. Args:
  5461. volumes (np.ndarray): The input volumes to apply the PlasmaShadow transform to.
  5462. **params (Any): Additional parameters for the transform.
  5463. """
  5464. return self.apply(volumes, **params)
  5465. class Illumination(ImageOnlyTransform):
  5466. """Apply various illumination effects to the image.
  5467. This transform simulates different lighting conditions by applying controlled
  5468. illumination patterns. It can create effects like:
  5469. - Directional lighting (linear mode)
  5470. - Corner shadows/highlights (corner mode)
  5471. - Spotlights or local lighting (gaussian mode)
  5472. These effects can be used to:
  5473. - Simulate natural lighting variations
  5474. - Add dramatic lighting effects
  5475. - Create synthetic shadows or highlights
  5476. - Augment training data with different lighting conditions
  5477. Args:
  5478. mode (Literal["linear", "corner", "gaussian"]): Type of illumination pattern:
  5479. - 'linear': Creates a smooth gradient across the image,
  5480. simulating directional lighting like sunlight
  5481. through a window
  5482. - 'corner': Applies gradient from any corner,
  5483. simulating light source from a corner
  5484. - 'gaussian': Creates a circular spotlight effect,
  5485. simulating local light sources
  5486. Default: 'linear'
  5487. intensity_range (tuple[float, float]): Range for effect strength.
  5488. Values between 0.01 and 0.2:
  5489. - 0.01-0.05: Subtle lighting changes
  5490. - 0.05-0.1: Moderate lighting effects
  5491. - 0.1-0.2: Strong lighting effects
  5492. Default: (0.01, 0.2)
  5493. effect_type (str): Type of lighting change:
  5494. - 'brighten': Only adds light (like a spotlight)
  5495. - 'darken': Only removes light (like a shadow)
  5496. - 'both': Randomly chooses between brightening and darkening
  5497. Default: 'both'
  5498. angle_range (tuple[float, float]): Range for gradient angle in degrees.
  5499. Controls direction of linear gradient:
  5500. - 0°: Left to right
  5501. - 90°: Top to bottom
  5502. - 180°: Right to left
  5503. - 270°: Bottom to top
  5504. Only used for 'linear' mode.
  5505. Default: (0, 360)
  5506. center_range (tuple[float, float]): Range for spotlight position.
  5507. Values between 0 and 1 representing relative position:
  5508. - (0, 0): Top-left corner
  5509. - (1, 1): Bottom-right corner
  5510. - (0.5, 0.5): Center of image
  5511. Only used for 'gaussian' mode.
  5512. Default: (0.1, 0.9)
  5513. sigma_range (tuple[float, float]): Range for spotlight size.
  5514. Values between 0.2 and 1.0:
  5515. - 0.2: Small, focused spotlight
  5516. - 0.5: Medium-sized light area
  5517. - 1.0: Broad, soft lighting
  5518. Only used for 'gaussian' mode.
  5519. Default: (0.2, 1.0)
  5520. p (float): Probability of applying the transform. Default: 0.5
  5521. Targets:
  5522. image
  5523. Image types:
  5524. uint8, float32
  5525. Examples:
  5526. >>> import albumentations as A
  5527. >>> # Simulate sunlight through window
  5528. >>> transform = A.Illumination(
  5529. ... mode='linear',
  5530. ... intensity_range=(0.05, 0.1),
  5531. ... effect_type='brighten',
  5532. ... angle_range=(30, 60)
  5533. ... )
  5534. >>>
  5535. >>> # Create dramatic corner shadow
  5536. >>> transform = A.Illumination(
  5537. ... mode='corner',
  5538. ... intensity_range=(0.1, 0.2),
  5539. ... effect_type='darken'
  5540. ... )
  5541. >>>
  5542. >>> # Add multiple spotlights
  5543. >>> transform1 = A.Illumination(
  5544. ... mode='gaussian',
  5545. ... intensity_range=(0.05, 0.15),
  5546. ... effect_type='brighten',
  5547. ... center_range=(0.2, 0.4),
  5548. ... sigma_range=(0.2, 0.3)
  5549. ... )
  5550. >>> transform2 = A.Illumination(
  5551. ... mode='gaussian',
  5552. ... intensity_range=(0.05, 0.15),
  5553. ... effect_type='darken',
  5554. ... center_range=(0.6, 0.8),
  5555. ... sigma_range=(0.3, 0.5)
  5556. ... )
  5557. >>> transforms = A.Compose([transform1, transform2])
  5558. References:
  5559. - Lighting in Computer Vision:
  5560. https://en.wikipedia.org/wiki/Lighting_in_computer_vision
  5561. - Image-based lighting:
  5562. https://en.wikipedia.org/wiki/Image-based_lighting
  5563. - Similar implementation in Kornia:
  5564. https://kornia.readthedocs.io/en/latest/augmentation.html#randomlinearillumination
  5565. - Research on lighting augmentation:
  5566. "Learning Deep Representations of Fine-grained Visual Descriptions"
  5567. https://arxiv.org/abs/1605.05395
  5568. - Photography lighting patterns:
  5569. https://en.wikipedia.org/wiki/Lighting_pattern
  5570. Note:
  5571. - The transform preserves image range and dtype
  5572. - Effects are applied multiplicatively to preserve texture
  5573. - Can be combined with other transforms for complex lighting scenarios
  5574. - Useful for training models to be robust to lighting variations
  5575. """
  5576. class InitSchema(BaseTransformInitSchema):
  5577. mode: Literal["linear", "corner", "gaussian"]
  5578. intensity_range: Annotated[
  5579. tuple[float, float],
  5580. AfterValidator(check_range_bounds(0.01, 0.2)),
  5581. ]
  5582. effect_type: Literal["brighten", "darken", "both"]
  5583. angle_range: Annotated[
  5584. tuple[float, float],
  5585. AfterValidator(check_range_bounds(0, 360)),
  5586. ]
  5587. center_range: Annotated[
  5588. tuple[float, float],
  5589. AfterValidator(check_range_bounds(0, 1)),
  5590. ]
  5591. sigma_range: Annotated[
  5592. tuple[float, float],
  5593. AfterValidator(check_range_bounds(0.2, 1.0)),
  5594. ]
  5595. def __init__(
  5596. self,
  5597. mode: Literal["linear", "corner", "gaussian"] = "linear",
  5598. intensity_range: tuple[float, float] = (0.01, 0.2),
  5599. effect_type: Literal["brighten", "darken", "both"] = "both",
  5600. angle_range: tuple[float, float] = (0, 360),
  5601. center_range: tuple[float, float] = (0.1, 0.9),
  5602. sigma_range: tuple[float, float] = (0.2, 1.0),
  5603. p: float = 0.5,
  5604. ):
  5605. super().__init__(p=p)
  5606. self.mode = mode
  5607. self.intensity_range = intensity_range
  5608. self.effect_type = effect_type
  5609. self.angle_range = angle_range
  5610. self.center_range = center_range
  5611. self.sigma_range = sigma_range
  5612. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  5613. """Generate parameters for the Illumination transform.
  5614. Args:
  5615. params (dict[str, Any]): The parameters of the transform.
  5616. data (dict[str, Any]): The data to apply the transform to.
  5617. """
  5618. intensity = self.py_random.uniform(*self.intensity_range)
  5619. # Determine if brightening or darkening
  5620. sign = 1 # brighten
  5621. if self.effect_type == "both":
  5622. sign = 1 if self.py_random.random() > 0.5 else -1
  5623. elif self.effect_type == "darken":
  5624. sign = -1
  5625. intensity *= sign
  5626. if self.mode == "linear":
  5627. angle = self.py_random.uniform(*self.angle_range)
  5628. return {
  5629. "intensity": intensity,
  5630. "angle": angle,
  5631. }
  5632. if self.mode == "corner":
  5633. corner = self.py_random.randint(0, 3) # Choose random corner
  5634. return {
  5635. "intensity": intensity,
  5636. "corner": corner,
  5637. }
  5638. x = self.py_random.uniform(*self.center_range)
  5639. y = self.py_random.uniform(*self.center_range)
  5640. sigma = self.py_random.uniform(*self.sigma_range)
  5641. return {
  5642. "intensity": intensity,
  5643. "center": (x, y),
  5644. "sigma": sigma,
  5645. }
  5646. def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
  5647. """Apply the Illumination transform to the input image.
  5648. Args:
  5649. img (np.ndarray): The input image to apply the Illumination transform to.
  5650. **params (Any): Additional parameters for the transform.
  5651. """
  5652. if self.mode == "linear":
  5653. return fpixel.apply_linear_illumination(
  5654. img,
  5655. intensity=params["intensity"],
  5656. angle=params["angle"],
  5657. )
  5658. if self.mode == "corner":
  5659. return fpixel.apply_corner_illumination(
  5660. img,
  5661. intensity=params["intensity"],
  5662. corner=params["corner"],
  5663. )
  5664. return fpixel.apply_gaussian_illumination(
  5665. img,
  5666. intensity=params["intensity"],
  5667. center=params["center"],
  5668. sigma=params["sigma"],
  5669. )
  5670. class AutoContrast(ImageOnlyTransform):
  5671. """Automatically adjust image contrast by stretching the intensity range.
  5672. This transform provides two methods for contrast enhancement:
  5673. 1. CDF method (default): Uses cumulative distribution function for more gradual adjustment
  5674. 2. PIL method: Uses linear scaling like PIL.ImageOps.autocontrast
  5675. The transform can optionally exclude extreme values from both ends of the
  5676. intensity range and preserve specific intensity values (e.g., alpha channel).
  5677. Args:
  5678. cutoff (float): Percentage of pixels to exclude from both ends of the histogram.
  5679. Range: [0, 100]. Default: 0 (use full intensity range)
  5680. - 0 means use the minimum and maximum intensity values found
  5681. - 20 means exclude darkest and brightest 20% of pixels
  5682. ignore (int, optional): Intensity value to preserve (e.g., alpha channel).
  5683. Range: [0, 255]. Default: None
  5684. - If specified, this intensity value will not be modified
  5685. - Useful for images with alpha channel or special marker values
  5686. method (Literal["cdf", "pil"]): Algorithm to use for contrast enhancement.
  5687. Default: "cdf"
  5688. - "cdf": Uses cumulative distribution for smoother adjustment
  5689. - "pil": Uses linear scaling like PIL.ImageOps.autocontrast
  5690. p (float): Probability of applying the transform. Default: 0.5
  5691. Targets:
  5692. image
  5693. Image types:
  5694. uint8, float32
  5695. Note:
  5696. - The transform processes each color channel independently
  5697. - For grayscale images, only one channel is processed
  5698. - The output maintains the same dtype as input
  5699. - Empty or single-color channels remain unchanged
  5700. Examples:
  5701. >>> import albumentations as A
  5702. >>> # Basic usage
  5703. >>> transform = A.AutoContrast(p=1.0)
  5704. >>>
  5705. >>> # Exclude extreme values
  5706. >>> transform = A.AutoContrast(cutoff=20, p=1.0)
  5707. >>>
  5708. >>> # Preserve alpha channel
  5709. >>> transform = A.AutoContrast(ignore=255, p=1.0)
  5710. >>>
  5711. >>> # Use PIL-like contrast enhancement
  5712. >>> transform = A.AutoContrast(method="pil", p=1.0)
  5713. """
  5714. class InitSchema(BaseTransformInitSchema):
  5715. cutoff: float = Field(ge=0, le=100)
  5716. ignore: int | None = Field(ge=0, le=255)
  5717. method: Literal["cdf", "pil"]
  5718. def __init__(
  5719. self,
  5720. cutoff: float = 0,
  5721. ignore: int | None = None,
  5722. method: Literal["cdf", "pil"] = "cdf",
  5723. p: float = 0.5,
  5724. ):
  5725. super().__init__(p=p)
  5726. self.cutoff = cutoff
  5727. self.ignore = ignore
  5728. self.method = method
  5729. def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
  5730. """Apply the AutoContrast transform to the input image.
  5731. Args:
  5732. img (np.ndarray): The input image to apply the AutoContrast transform to.
  5733. **params (Any): Additional parameters for the transform.
  5734. """
  5735. return fpixel.auto_contrast(img, self.cutoff, self.ignore, self.method)
  5736. @batch_transform("channel", has_batch_dim=True, has_depth_dim=False)
  5737. def apply_to_images(self, images: np.ndarray, **params: Any) -> np.ndarray:
  5738. """Apply the AutoContrast transform to a batch of images.
  5739. Args:
  5740. images (np.ndarray): The input images to apply the AutoContrast transform to.
  5741. **params (Any): Additional parameters for the transform.
  5742. """
  5743. return self.apply(images, **params)
  5744. @batch_transform("channel", has_batch_dim=False, has_depth_dim=True)
  5745. def apply_to_volume(self, volume: np.ndarray, **params: Any) -> np.ndarray:
  5746. """Apply the AutoContrast transform to a batch of volumes.
  5747. Args:
  5748. volume (np.ndarray): The input volume to apply the AutoContrast transform to.
  5749. **params (Any): Additional parameters for the transform.
  5750. """
  5751. return self.apply(volume, **params)
  5752. @batch_transform("channel", has_batch_dim=True, has_depth_dim=True)
  5753. def apply_to_volumes(self, volumes: np.ndarray, **params: Any) -> np.ndarray:
  5754. """Apply the AutoContrast transform to a batch of volumes.
  5755. Args:
  5756. volumes (np.ndarray): The input volumes to apply the AutoContrast transform to.
  5757. **params (Any): Additional parameters for the transform.
  5758. """
  5759. return self.apply(volumes, **params)
  5760. class HEStain(ImageOnlyTransform):
  5761. """Applies H&E (Hematoxylin and Eosin) stain augmentation to histopathology images.
  5762. This transform simulates different H&E staining conditions using either:
  5763. 1. Predefined stain matrices (8 standard references)
  5764. 2. Vahadane method for stain extraction
  5765. 3. Macenko method for stain extraction
  5766. 4. Custom stain matrices
  5767. Args:
  5768. method(Literal["preset", "random_preset", "vahadane", "macenko"]): Method to use for stain augmentation:
  5769. - "preset": Use predefined stain matrices
  5770. - "random_preset": Randomly select a preset matrix each time
  5771. - "vahadane": Extract using Vahadane method
  5772. - "macenko": Extract using Macenko method
  5773. Default: "preset"
  5774. preset(str | None): Preset stain matrix to use when method="preset":
  5775. - "ruifrok": Standard reference from Ruifrok & Johnston
  5776. - "macenko": Reference from Macenko's method
  5777. - "standard": Typical bright-field microscopy
  5778. - "high_contrast": Enhanced contrast
  5779. - "h_heavy": Hematoxylin dominant
  5780. - "e_heavy": Eosin dominant
  5781. - "dark": Darker staining
  5782. - "light": Lighter staining
  5783. Default: "standard"
  5784. intensity_scale_range(tuple[float, float]): Range for multiplicative stain intensity variation.
  5785. Values are multipliers between 0.5 and 1.5. For example:
  5786. - (0.7, 1.3) means stain intensities will vary from 70% to 130%
  5787. - (0.9, 1.1) gives subtle variations
  5788. - (0.5, 1.5) gives dramatic variations
  5789. Default: (0.7, 1.3)
  5790. intensity_shift_range(tuple[float, float]): Range for additive stain intensity variation.
  5791. Values between -0.3 and 0.3. For example:
  5792. - (-0.2, 0.2) means intensities will be shifted by -20% to +20%
  5793. - (-0.1, 0.1) gives subtle shifts
  5794. - (-0.3, 0.3) gives dramatic shifts
  5795. Default: (-0.2, 0.2)
  5796. augment_background(bool): Whether to apply augmentation to background regions.
  5797. Default: False
  5798. Targets:
  5799. image
  5800. Number of channels:
  5801. 3
  5802. Image types:
  5803. uint8, float32
  5804. References:
  5805. - A. C. Ruifrok and D. A. Johnston, "Quantification of histochemical": Analytical and quantitative
  5806. cytology and histology, 2001.
  5807. - M. Macenko et al., "A method for normalizing histology slides for: 2009 IEEE International Symposium on
  5808. quantitative analysis," 2009 IEEE International Symposium on Biomedical Imaging, 2009.
  5809. Examples:
  5810. >>> import numpy as np
  5811. >>> import albumentations as A
  5812. >>> import cv2
  5813. >>>
  5814. >>> # Create a sample H&E stained histopathology image
  5815. >>> # For real use cases, load an actual H&E stained image
  5816. >>> image = np.zeros((300, 300, 3), dtype=np.uint8)
  5817. >>> # Simulate tissue regions with different staining patterns
  5818. >>> image[50:150, 50:150] = np.array([120, 140, 180], dtype=np.uint8) # Hematoxylin-rich region
  5819. >>> image[150:250, 150:250] = np.array([140, 160, 120], dtype=np.uint8) # Eosin-rich region
  5820. >>>
  5821. >>> # Example 1: Using a specific preset stain matrix
  5822. >>> transform = A.HEStain(
  5823. ... method="preset",
  5824. ... preset="standard",
  5825. ... intensity_scale_range=(0.8, 1.2),
  5826. ... intensity_shift_range=(-0.1, 0.1),
  5827. ... augment_background=False,
  5828. ... p=1.0
  5829. ... )
  5830. >>> result = transform(image=image)
  5831. >>> transformed_image = result['image']
  5832. >>>
  5833. >>> # Example 2: Using random preset selection
  5834. >>> transform = A.HEStain(
  5835. ... method="random_preset",
  5836. ... intensity_scale_range=(0.7, 1.3),
  5837. ... intensity_shift_range=(-0.15, 0.15),
  5838. ... p=1.0
  5839. ... )
  5840. >>> result = transform(image=image)
  5841. >>> transformed_image = result['image']
  5842. >>>
  5843. >>> # Example 3: Using Vahadane method (requires H&E stained input)
  5844. >>> transform = A.HEStain(
  5845. ... method="vahadane",
  5846. ... intensity_scale_range=(0.7, 1.3),
  5847. ... p=1.0
  5848. ... )
  5849. >>> result = transform(image=image)
  5850. >>> transformed_image = result['image']
  5851. >>>
  5852. >>> # Example 4: Using Macenko method (requires H&E stained input)
  5853. >>> transform = A.HEStain(
  5854. ... method="macenko",
  5855. ... intensity_scale_range=(0.7, 1.3),
  5856. ... intensity_shift_range=(-0.2, 0.2),
  5857. ... p=1.0
  5858. ... )
  5859. >>> result = transform(image=image)
  5860. >>> transformed_image = result['image']
  5861. >>>
  5862. >>> # Example 5: Combining with other transforms in a pipeline
  5863. >>> transform = A.Compose([
  5864. ... A.HEStain(method="preset", preset="high_contrast", p=1.0),
  5865. ... A.RandomBrightnessContrast(p=0.5),
  5866. ... ])
  5867. >>> result = transform(image=image)
  5868. >>> transformed_image = result['image']
  5869. """
  5870. class InitSchema(BaseTransformInitSchema):
  5871. method: Literal["preset", "random_preset", "vahadane", "macenko"]
  5872. preset: (
  5873. Literal[
  5874. "ruifrok",
  5875. "macenko",
  5876. "standard",
  5877. "high_contrast",
  5878. "h_heavy",
  5879. "e_heavy",
  5880. "dark",
  5881. "light",
  5882. ]
  5883. | None
  5884. )
  5885. intensity_scale_range: Annotated[
  5886. tuple[float, float],
  5887. AfterValidator(nondecreasing),
  5888. AfterValidator(check_range_bounds(0, None)),
  5889. ]
  5890. intensity_shift_range: Annotated[
  5891. tuple[float, float],
  5892. AfterValidator(nondecreasing),
  5893. AfterValidator(check_range_bounds(-1, 1)),
  5894. ]
  5895. augment_background: bool
  5896. @model_validator(mode="after")
  5897. def _validate_matrix_selection(self) -> Self:
  5898. if self.method == "preset" and self.preset is None:
  5899. self.preset = "standard"
  5900. elif self.method == "random_preset" and self.preset is not None:
  5901. raise ValueError("preset should not be specified when method='random_preset'")
  5902. return self
  5903. def __init__(
  5904. self,
  5905. method: Literal["preset", "random_preset", "vahadane", "macenko"] = "random_preset",
  5906. preset: Literal[
  5907. "ruifrok",
  5908. "macenko",
  5909. "standard",
  5910. "high_contrast",
  5911. "h_heavy",
  5912. "e_heavy",
  5913. "dark",
  5914. "light",
  5915. ]
  5916. | None = None,
  5917. intensity_scale_range: tuple[float, float] = (0.7, 1.3),
  5918. intensity_shift_range: tuple[float, float] = (-0.2, 0.2),
  5919. augment_background: bool = False,
  5920. p: float = 0.5,
  5921. ):
  5922. super().__init__(p=p)
  5923. self.method = method
  5924. self.preset = preset
  5925. self.intensity_scale_range = intensity_scale_range
  5926. self.intensity_shift_range = intensity_shift_range
  5927. self.augment_background = augment_background
  5928. self.stain_normalizer = None
  5929. # Initialize stain extractor here if needed
  5930. if method in ["vahadane", "macenko"]:
  5931. self.stain_extractor = fpixel.get_normalizer(
  5932. cast("Literal['vahadane', 'macenko']", method),
  5933. )
  5934. self.preset_names = [
  5935. "ruifrok",
  5936. "macenko",
  5937. "standard",
  5938. "high_contrast",
  5939. "h_heavy",
  5940. "e_heavy",
  5941. "dark",
  5942. "light",
  5943. ]
  5944. def _get_stain_matrix(self, img: np.ndarray) -> np.ndarray:
  5945. """Get stain matrix based on selected method."""
  5946. if self.method == "preset" and self.preset is not None:
  5947. return fpixel.STAIN_MATRICES[self.preset]
  5948. if self.method == "random_preset":
  5949. random_preset = self.py_random.choice(self.preset_names)
  5950. return fpixel.STAIN_MATRICES[random_preset]
  5951. # vahadane or macenko
  5952. self.stain_extractor.fit(img)
  5953. return self.stain_extractor.stain_matrix_target
  5954. def apply(
  5955. self,
  5956. img: np.ndarray,
  5957. stain_matrix: np.ndarray,
  5958. scale_factors: np.ndarray,
  5959. shift_values: np.ndarray,
  5960. **params: Any,
  5961. ) -> np.ndarray:
  5962. """Apply the HEStain transform to the input image.
  5963. Args:
  5964. img (np.ndarray): The input image to apply the HEStain transform to.
  5965. stain_matrix (np.ndarray): The stain matrix to use for the transform.
  5966. scale_factors (np.ndarray): The scale factors to use for the transform.
  5967. shift_values (np.ndarray): The shift values to use for the transform.
  5968. **params (Any): Additional parameters for the transform.
  5969. """
  5970. non_rgb_error(img)
  5971. return fpixel.apply_he_stain_augmentation(
  5972. img=img,
  5973. stain_matrix=stain_matrix,
  5974. scale_factors=scale_factors,
  5975. shift_values=shift_values,
  5976. augment_background=self.augment_background,
  5977. )
  5978. @batch_transform("channel", has_batch_dim=True, has_depth_dim=False)
  5979. def apply_to_images(self, images: np.ndarray, **params: Any) -> np.ndarray:
  5980. """Apply the HEStain transform to a batch of images.
  5981. Args:
  5982. images (np.ndarray): The input images to apply the HEStain transform to.
  5983. **params (Any): Additional parameters for the transform.
  5984. """
  5985. return self.apply(images, **params)
  5986. @batch_transform("channel", has_batch_dim=False, has_depth_dim=True)
  5987. def apply_to_volume(self, volume: np.ndarray, **params: Any) -> np.ndarray:
  5988. """Apply the HEStain transform to a batch of volumes.
  5989. Args:
  5990. volume (np.ndarray): The input volumes to apply the HEStain transform to.
  5991. **params (Any): Additional parameters for the transform.
  5992. """
  5993. return self.apply(volume, **params)
  5994. @batch_transform("channel", has_batch_dim=True, has_depth_dim=True)
  5995. def apply_to_volumes(self, volumes: np.ndarray, **params: Any) -> np.ndarray:
  5996. """Apply the HEStain transform to a batch of volumes.
  5997. Args:
  5998. volumes (np.ndarray): The input volumes to apply the HEStain transform to.
  5999. **params (Any): Additional parameters for the transform.
  6000. """
  6001. return self.apply(volumes, **params)
  6002. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  6003. """Generate parameters for the HEStain transform.
  6004. Args:
  6005. params (dict[str, Any]): The parameters of the transform.
  6006. data (dict[str, Any]): The data to apply the transform to.
  6007. """
  6008. # Get stain matrix
  6009. image = data["image"] if "image" in data else data["images"][0]
  6010. stain_matrix = self._get_stain_matrix(image)
  6011. # Generate random scaling and shift parameters for both H&E channels
  6012. scale_factors = np.array(
  6013. [
  6014. self.py_random.uniform(*self.intensity_scale_range),
  6015. self.py_random.uniform(*self.intensity_scale_range),
  6016. ],
  6017. )
  6018. shift_values = np.array(
  6019. [
  6020. self.py_random.uniform(*self.intensity_shift_range),
  6021. self.py_random.uniform(*self.intensity_shift_range),
  6022. ],
  6023. )
  6024. return {
  6025. "stain_matrix": stain_matrix,
  6026. "scale_factors": scale_factors,
  6027. "shift_values": shift_values,
  6028. }