| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979209802098120982209832098420985209862098720988209892099020991209922099320994209952099620997209982099921000210012100221003210042100521006210072100821009210102101121012210132101421015210162101721018210192102021021210222102321024210252102621027210282102921030210312103221033210342103521036210372103821039210402104121042210432104421045210462104721048210492105021051210522105321054210552105621057210582105921060210612106221063210642106521066210672106821069210702107121072210732107421075210762107721078210792108021081210822108321084210852108621087210882108921090210912109221093210942109521096210972109821099211002110121102211032110421105211062110721108211092111021111211122111321114211152111621117211182111921120211212112221123211242112521126211272112821129211302113121132211332113421135211362113721138211392114021141211422114321144211452114621147211482114921150211512115221153211542115521156211572115821159211602116121162211632116421165211662116721168211692117021171211722117321174211752117621177211782117921180211812118221183211842118521186211872118821189211902119121192211932119421195211962119721198211992120021201212022120321204212052120621207212082120921210212112121221213212142121521216212172121821219212202122121222212232122421225212262122721228212292123021231212322123321234212352123621237212382123921240212412124221243212442124521246212472124821249212502125121252212532125421255212562125721258212592126021261212622126321264212652126621267212682126921270212712127221273212742127521276212772127821279212802128121282212832128421285212862128721288212892129021291212922129321294212952129621297212982129921300213012130221303213042130521306213072130821309213102131121312213132131421315213162131721318213192132021321213222132321324213252132621327213282132921330213312133221333213342133521336213372133821339213402134121342213432134421345213462134721348213492135021351213522135321354213552135621357213582135921360213612136221363213642136521366213672136821369213702137121372213732137421375213762137721378213792138021381213822138321384213852138621387213882138921390213912139221393213942139521396213972139821399214002140121402214032140421405214062140721408214092141021411214122141321414214152141621417214182141921420214212142221423214242142521426214272142821429214302143121432214332143421435214362143721438214392144021441214422144321444214452144621447214482144921450214512145221453214542145521456214572145821459214602146121462214632146421465214662146721468214692147021471214722147321474214752147621477214782147921480214812148221483214842148521486214872148821489214902149121492214932149421495214962149721498214992150021501215022150321504215052150621507215082150921510215112151221513215142151521516215172151821519215202152121522215232152421525215262152721528215292153021531215322153321534215352153621537215382153921540215412154221543215442154521546215472154821549215502155121552215532155421555215562155721558215592156021561215622156321564215652156621567215682156921570215712157221573215742157521576215772157821579215802158121582215832158421585215862158721588215892159021591215922159321594215952159621597215982159921600216012160221603216042160521606216072160821609216102161121612216132161421615216162161721618216192162021621216222162321624216252162621627216282162921630216312163221633216342163521636216372163821639216402164121642216432164421645216462164721648216492165021651216522165321654216552165621657216582165921660216612166221663216642166521666216672166821669216702167121672216732167421675216762167721678216792168021681216822168321684216852168621687216882168921690216912169221693216942169521696216972169821699217002170121702217032170421705217062170721708217092171021711217122171321714217152171621717217182171921720217212172221723217242172521726217272172821729217302173121732217332173421735217362173721738217392174021741217422174321744217452174621747217482174921750217512175221753217542175521756217572175821759217602176121762217632176421765217662176721768217692177021771217722177321774217752177621777217782177921780217812178221783217842178521786217872178821789217902179121792217932179421795217962179721798217992180021801218022180321804218052180621807218082180921810218112181221813218142181521816218172181821819218202182121822218232182421825218262182721828218292183021831218322183321834218352183621837218382183921840218412184221843218442184521846218472184821849218502185121852218532185421855218562185721858218592186021861218622186321864218652186621867218682186921870218712187221873218742187521876218772187821879218802188121882218832188421885218862188721888218892189021891218922189321894218952189621897218982189921900219012190221903219042190521906219072190821909219102191121912219132191421915219162191721918219192192021921219222192321924219252192621927219282192921930219312193221933219342193521936219372193821939219402194121942219432194421945219462194721948219492195021951219522195321954219552195621957219582195921960219612196221963219642196521966219672196821969219702197121972219732197421975219762197721978219792198021981219822198321984219852198621987219882198921990219912199221993219942199521996219972199821999220002200122002220032200422005220062200722008220092201022011220122201322014220152201622017220182201922020220212202222023220242202522026220272202822029220302203122032220332203422035220362203722038220392204022041220422204322044220452204622047220482204922050220512205222053220542205522056220572205822059220602206122062220632206422065220662206722068220692207022071220722207322074220752207622077220782207922080220812208222083220842208522086220872208822089220902209122092220932209422095220962209722098220992210022101221022210322104221052210622107221082210922110221112211222113221142211522116221172211822119221202212122122221232212422125221262212722128221292213022131221322213322134221352213622137221382213922140221412214222143221442214522146221472214822149221502215122152221532215422155221562215722158221592216022161221622216322164221652216622167221682216922170221712217222173221742217522176221772217822179221802218122182221832218422185221862218722188221892219022191221922219322194221952219622197221982219922200222012220222203222042220522206222072220822209222102221122212222132221422215222162221722218222192222022221222222222322224222252222622227222282222922230222312223222233222342223522236222372223822239222402224122242222432224422245222462224722248222492225022251222522225322254222552225622257222582225922260222612226222263222642226522266222672226822269222702227122272222732227422275222762227722278222792228022281222822228322284222852228622287222882228922290222912229222293222942229522296222972229822299223002230122302223032230422305223062230722308223092231022311223122231322314223152231622317223182231922320223212232222323223242232522326223272232822329223302233122332223332233422335223362233722338223392234022341223422234322344223452234622347223482234922350223512235222353223542235522356223572235822359223602236122362223632236422365223662236722368223692237022371223722237322374223752237622377223782237922380223812238222383223842238522386223872238822389223902239122392223932239422395223962239722398223992240022401224022240322404224052240622407224082240922410224112241222413224142241522416224172241822419224202242122422224232242422425224262242722428224292243022431224322243322434224352243622437224382243922440224412244222443224442244522446224472244822449224502245122452224532245422455224562245722458224592246022461224622246322464224652246622467224682246922470224712247222473224742247522476224772247822479224802248122482224832248422485224862248722488224892249022491224922249322494224952249622497224982249922500225012250222503225042250522506225072250822509225102251122512225132251422515225162251722518225192252022521225222252322524225252252622527225282252922530225312253222533225342253522536225372253822539225402254122542225432254422545225462254722548225492255022551225522255322554225552255622557225582255922560225612256222563225642256522566225672256822569225702257122572225732257422575225762257722578225792258022581225822258322584225852258622587225882258922590225912259222593225942259522596225972259822599226002260122602226032260422605226062260722608226092261022611226122261322614226152261622617226182261922620226212262222623226242262522626226272262822629226302263122632226332263422635226362263722638226392264022641226422264322644226452264622647226482264922650226512265222653226542265522656226572265822659226602266122662226632266422665226662266722668226692267022671226722267322674226752267622677226782267922680226812268222683226842268522686226872268822689226902269122692226932269422695226962269722698226992270022701227022270322704227052270622707227082270922710227112271222713227142271522716227172271822719227202272122722227232272422725227262272722728227292273022731227322273322734227352273622737227382273922740227412274222743227442274522746227472274822749227502275122752227532275422755227562275722758227592276022761227622276322764227652276622767227682276922770227712277222773227742277522776227772277822779227802278122782227832278422785227862278722788227892279022791227922279322794227952279622797227982279922800228012280222803228042280522806228072280822809228102281122812228132281422815228162281722818228192282022821228222282322824228252282622827228282282922830228312283222833228342283522836228372283822839228402284122842228432284422845228462284722848228492285022851228522285322854228552285622857228582285922860228612286222863228642286522866228672286822869228702287122872228732287422875228762287722878228792288022881228822288322884228852288622887228882288922890228912289222893228942289522896228972289822899229002290122902229032290422905229062290722908229092291022911229122291322914229152291622917229182291922920229212292222923229242292522926229272292822929229302293122932229332293422935229362293722938229392294022941229422294322944229452294622947229482294922950229512295222953229542295522956229572295822959229602296122962229632296422965229662296722968229692297022971229722297322974229752297622977229782297922980229812298222983229842298522986229872298822989229902299122992229932299422995229962299722998229992300023001230022300323004230052300623007230082300923010230112301223013230142301523016230172301823019230202302123022230232302423025230262302723028230292303023031230322303323034230352303623037230382303923040230412304223043230442304523046230472304823049230502305123052230532305423055230562305723058230592306023061230622306323064230652306623067230682306923070230712307223073230742307523076230772307823079230802308123082230832308423085230862308723088230892309023091230922309323094230952309623097230982309923100231012310223103231042310523106231072310823109231102311123112231132311423115231162311723118231192312023121231222312323124231252312623127231282312923130231312313223133231342313523136231372313823139231402314123142231432314423145231462314723148231492315023151231522315323154231552315623157231582315923160231612316223163231642316523166231672316823169231702317123172231732317423175231762317723178231792318023181231822318323184231852318623187231882318923190231912319223193231942319523196231972319823199232002320123202232032320423205232062320723208232092321023211232122321323214232152321623217232182321923220232212322223223232242322523226232272322823229232302323123232232332323423235232362323723238232392324023241232422324323244232452324623247232482324923250232512325223253232542325523256232572325823259232602326123262232632326423265232662326723268232692327023271232722327323274232752327623277232782327923280232812328223283232842328523286232872328823289232902329123292232932329423295232962329723298232992330023301233022330323304233052330623307233082330923310233112331223313233142331523316233172331823319233202332123322233232332423325233262332723328233292333023331233322333323334233352333623337233382333923340233412334223343233442334523346233472334823349233502335123352233532335423355233562335723358233592336023361233622336323364233652336623367233682336923370233712337223373233742337523376233772337823379233802338123382233832338423385233862338723388233892339023391233922339323394233952339623397233982339923400234012340223403234042340523406234072340823409234102341123412234132341423415234162341723418234192342023421234222342323424234252342623427234282342923430234312343223433234342343523436234372343823439234402344123442234432344423445234462344723448234492345023451234522345323454234552345623457234582345923460234612346223463234642346523466234672346823469234702347123472234732347423475234762347723478234792348023481234822348323484234852348623487234882348923490234912349223493234942349523496234972349823499235002350123502235032350423505235062350723508235092351023511235122351323514235152351623517235182351923520235212352223523235242352523526235272352823529235302353123532235332353423535235362353723538235392354023541235422354323544235452354623547235482354923550235512355223553235542355523556235572355823559235602356123562235632356423565235662356723568235692357023571235722357323574235752357623577235782357923580235812358223583235842358523586235872358823589235902359123592235932359423595235962359723598235992360023601236022360323604236052360623607236082360923610236112361223613236142361523616236172361823619236202362123622236232362423625236262362723628236292363023631236322363323634236352363623637236382363923640236412364223643236442364523646236472364823649236502365123652236532365423655236562365723658236592366023661236622366323664236652366623667236682366923670236712367223673236742367523676236772367823679236802368123682236832368423685236862368723688236892369023691236922369323694236952369623697236982369923700237012370223703237042370523706237072370823709237102371123712237132371423715237162371723718237192372023721237222372323724237252372623727237282372923730237312373223733237342373523736237372373823739237402374123742237432374423745237462374723748237492375023751237522375323754237552375623757237582375923760237612376223763237642376523766237672376823769237702377123772237732377423775237762377723778237792378023781237822378323784237852378623787237882378923790237912379223793237942379523796237972379823799238002380123802238032380423805238062380723808238092381023811238122381323814238152381623817238182381923820238212382223823238242382523826238272382823829238302383123832238332383423835238362383723838238392384023841238422384323844238452384623847238482384923850238512385223853238542385523856238572385823859238602386123862238632386423865238662386723868238692387023871238722387323874238752387623877238782387923880238812388223883238842388523886238872388823889238902389123892238932389423895238962389723898238992390023901239022390323904239052390623907239082390923910239112391223913239142391523916239172391823919239202392123922239232392423925239262392723928239292393023931239322393323934239352393623937239382393923940239412394223943239442394523946239472394823949239502395123952239532395423955239562395723958239592396023961239622396323964239652396623967239682396923970239712397223973239742397523976239772397823979239802398123982239832398423985239862398723988239892399023991239922399323994239952399623997239982399924000240012400224003240042400524006240072400824009240102401124012240132401424015240162401724018240192402024021240222402324024240252402624027240282402924030240312403224033240342403524036240372403824039240402404124042240432404424045240462404724048240492405024051240522405324054240552405624057240582405924060240612406224063240642406524066240672406824069240702407124072240732407424075240762407724078240792408024081240822408324084240852408624087240882408924090240912409224093240942409524096240972409824099241002410124102241032410424105241062410724108241092411024111241122411324114241152411624117241182411924120241212412224123241242412524126241272412824129241302413124132241332413424135241362413724138241392414024141241422414324144241452414624147241482414924150241512415224153241542415524156241572415824159241602416124162241632416424165241662416724168241692417024171241722417324174241752417624177241782417924180241812418224183241842418524186241872418824189241902419124192241932419424195241962419724198241992420024201242022420324204242052420624207242082420924210242112421224213242142421524216242172421824219242202422124222242232422424225242262422724228242292423024231242322423324234242352423624237242382423924240242412424224243242442424524246242472424824249242502425124252242532425424255242562425724258242592426024261242622426324264242652426624267242682426924270242712427224273242742427524276242772427824279242802428124282242832428424285242862428724288242892429024291242922429324294242952429624297242982429924300243012430224303243042430524306243072430824309243102431124312243132431424315243162431724318243192432024321243222432324324243252432624327243282432924330243312433224333243342433524336243372433824339243402434124342243432434424345243462434724348243492435024351243522435324354243552435624357243582435924360243612436224363243642436524366243672436824369243702437124372243732437424375243762437724378243792438024381243822438324384243852438624387243882438924390243912439224393243942439524396243972439824399244002440124402244032440424405244062440724408244092441024411244122441324414244152441624417244182441924420244212442224423244242442524426244272442824429244302443124432244332443424435244362443724438244392444024441244422444324444244452444624447244482444924450244512445224453244542445524456244572445824459244602446124462244632446424465244662446724468244692447024471244722447324474244752447624477244782447924480244812448224483244842448524486244872448824489244902449124492244932449424495244962449724498244992450024501245022450324504245052450624507245082450924510245112451224513245142451524516245172451824519245202452124522245232452424525245262452724528245292453024531245322453324534245352453624537245382453924540245412454224543245442454524546245472454824549245502455124552245532455424555245562455724558245592456024561245622456324564245652456624567245682456924570245712457224573245742457524576245772457824579245802458124582245832458424585245862458724588245892459024591245922459324594245952459624597245982459924600246012460224603246042460524606246072460824609246102461124612246132461424615246162461724618246192462024621246222462324624246252462624627246282462924630246312463224633246342463524636246372463824639246402464124642246432464424645246462464724648246492465024651246522465324654246552465624657246582465924660246612466224663246642466524666246672466824669246702467124672246732467424675246762467724678246792468024681246822468324684246852468624687246882468924690246912469224693246942469524696246972469824699247002470124702247032470424705247062470724708247092471024711247122471324714247152471624717247182471924720247212472224723247242472524726247272472824729247302473124732247332473424735247362473724738247392474024741247422474324744247452474624747247482474924750247512475224753247542475524756247572475824759247602476124762247632476424765247662476724768247692477024771247722477324774247752477624777247782477924780247812478224783247842478524786247872478824789247902479124792247932479424795247962479724798247992480024801248022480324804248052480624807248082480924810248112481224813248142481524816248172481824819248202482124822248232482424825248262482724828248292483024831248322483324834248352483624837248382483924840248412484224843248442484524846248472484824849248502485124852248532485424855248562485724858248592486024861248622486324864248652486624867248682486924870248712487224873248742487524876248772487824879248802488124882248832488424885248862488724888248892489024891248922489324894248952489624897248982489924900249012490224903249042490524906249072490824909249102491124912249132491424915249162491724918249192492024921249222492324924249252492624927249282492924930249312493224933249342493524936249372493824939249402494124942249432494424945249462494724948249492495024951249522495324954249552495624957249582495924960249612496224963249642496524966249672496824969249702497124972249732497424975249762497724978249792498024981249822498324984249852498624987249882498924990249912499224993249942499524996249972499824999250002500125002250032500425005250062500725008250092501025011250122501325014250152501625017250182501925020250212502225023250242502525026250272502825029250302503125032250332503425035250362503725038250392504025041250422504325044250452504625047250482504925050250512505225053250542505525056250572505825059250602506125062250632506425065250662506725068250692507025071250722507325074250752507625077250782507925080250812508225083250842508525086250872508825089250902509125092250932509425095250962509725098250992510025101251022510325104251052510625107251082510925110251112511225113251142511525116251172511825119251202512125122251232512425125251262512725128251292513025131251322513325134251352513625137251382513925140251412514225143251442514525146251472514825149251502515125152251532515425155251562515725158251592516025161251622516325164251652516625167251682516925170251712517225173251742517525176251772517825179251802518125182251832518425185251862518725188251892519025191251922519325194251952519625197251982519925200252012520225203252042520525206252072520825209252102521125212252132521425215252162521725218252192522025221252222522325224252252522625227252282522925230252312523225233252342523525236252372523825239252402524125242252432524425245252462524725248252492525025251252522525325254252552525625257252582525925260252612526225263252642526525266252672526825269252702527125272252732527425275252762527725278252792528025281252822528325284252852528625287252882528925290252912529225293252942529525296252972529825299253002530125302253032530425305253062530725308253092531025311253122531325314253152531625317253182531925320253212532225323253242532525326253272532825329253302533125332253332533425335253362533725338253392534025341253422534325344253452534625347253482534925350253512535225353253542535525356253572535825359253602536125362253632536425365253662536725368253692537025371253722537325374253752537625377253782537925380253812538225383253842538525386253872538825389253902539125392253932539425395253962539725398253992540025401254022540325404254052540625407254082540925410254112541225413254142541525416254172541825419254202542125422254232542425425254262542725428254292543025431254322543325434254352543625437254382543925440254412544225443254442544525446254472544825449254502545125452254532545425455254562545725458254592546025461254622546325464254652546625467254682546925470254712547225473254742547525476254772547825479254802548125482254832548425485254862548725488254892549025491254922549325494254952549625497254982549925500255012550225503255042550525506255072550825509255102551125512255132551425515255162551725518255192552025521255222552325524255252552625527255282552925530255312553225533255342553525536255372553825539255402554125542255432554425545255462554725548255492555025551255522555325554255552555625557255582555925560255612556225563255642556525566255672556825569255702557125572255732557425575255762557725578255792558025581255822558325584255852558625587255882558925590255912559225593255942559525596255972559825599256002560125602256032560425605256062560725608256092561025611256122561325614256152561625617256182561925620256212562225623256242562525626256272562825629256302563125632256332563425635256362563725638256392564025641256422564325644256452564625647256482564925650256512565225653256542565525656256572565825659256602566125662256632566425665256662566725668256692567025671256722567325674256752567625677256782567925680256812568225683256842568525686256872568825689256902569125692256932569425695256962569725698256992570025701257022570325704257052570625707257082570925710257112571225713257142571525716257172571825719257202572125722257232572425725257262572725728257292573025731257322573325734257352573625737257382573925740257412574225743257442574525746257472574825749257502575125752257532575425755257562575725758257592576025761257622576325764257652576625767257682576925770257712577225773257742577525776257772577825779257802578125782257832578425785257862578725788257892579025791257922579325794257952579625797257982579925800258012580225803258042580525806258072580825809258102581125812258132581425815258162581725818258192582025821258222582325824258252582625827258282582925830258312583225833258342583525836258372583825839258402584125842258432584425845258462584725848258492585025851258522585325854258552585625857258582585925860258612586225863258642586525866258672586825869258702587125872258732587425875258762587725878258792588025881258822588325884258852588625887258882588925890258912589225893258942589525896258972589825899259002590125902259032590425905259062590725908259092591025911259122591325914259152591625917259182591925920259212592225923259242592525926259272592825929259302593125932259332593425935259362593725938259392594025941259422594325944259452594625947259482594925950259512595225953259542595525956259572595825959259602596125962259632596425965259662596725968259692597025971259722597325974259752597625977259782597925980259812598225983259842598525986259872598825989259902599125992259932599425995259962599725998259992600026001260022600326004260052600626007260082600926010260112601226013260142601526016260172601826019260202602126022260232602426025260262602726028260292603026031260322603326034260352603626037260382603926040260412604226043260442604526046260472604826049260502605126052260532605426055260562605726058260592606026061260622606326064260652606626067260682606926070260712607226073260742607526076260772607826079260802608126082260832608426085260862608726088260892609026091260922609326094260952609626097260982609926100261012610226103261042610526106261072610826109261102611126112261132611426115261162611726118261192612026121261222612326124261252612626127261282612926130261312613226133261342613526136261372613826139261402614126142261432614426145261462614726148261492615026151261522615326154261552615626157261582615926160261612616226163261642616526166261672616826169261702617126172261732617426175261762617726178261792618026181261822618326184261852618626187261882618926190261912619226193261942619526196261972619826199262002620126202262032620426205262062620726208262092621026211262122621326214262152621626217262182621926220262212622226223262242622526226262272622826229262302623126232262332623426235262362623726238262392624026241262422624326244262452624626247262482624926250262512625226253262542625526256262572625826259262602626126262262632626426265262662626726268262692627026271262722627326274262752627626277262782627926280 |
- /*
- * Copyright 1993-2023 NVIDIA Corporation. All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee. Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users. These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
- #ifndef __cuda_cuda_h__
- #define __cuda_cuda_h__
- #include <stdlib.h>
- #ifdef _MSC_VER
- typedef unsigned __int32 cuuint32_t;
- typedef unsigned __int64 cuuint64_t;
- #else
- #include <stdint.h>
- typedef uint32_t cuuint32_t;
- typedef uint64_t cuuint64_t;
- #endif
- #if defined(__CUDA_API_VERSION_INTERNAL) || defined(__DOXYGEN_ONLY__) || defined(CUDA_ENABLE_DEPRECATED)
- #define __CUDA_DEPRECATED
- #elif defined(_MSC_VER)
- #define __CUDA_DEPRECATED __declspec(deprecated)
- #elif defined(__GNUC__)
- #define __CUDA_DEPRECATED __attribute__((deprecated))
- #else
- #define __CUDA_DEPRECATED
- #endif
- #if defined(CUDA_FORCE_API_VERSION)
- #error "CUDA_FORCE_API_VERSION is no longer supported."
- #endif
- #if defined(__CUDA_API_VERSION_INTERNAL) || defined(CUDA_API_PER_THREAD_DEFAULT_STREAM)
- #define __CUDA_API_PER_THREAD_DEFAULT_STREAM
- #define __CUDA_API_PTDS(api) api ## _ptds
- #define __CUDA_API_PTSZ(api) api ## _ptsz
- #else
- #define __CUDA_API_PTDS(api) api
- #define __CUDA_API_PTSZ(api) api
- #endif
- #define cuDeviceTotalMem cuDeviceTotalMem_v2
- #define cuCtxCreate cuCtxCreate_v2
- #define cuCtxCreate_v3 cuCtxCreate_v3
- #define cuCtxCreate_v4 cuCtxCreate_v4
- #define cuModuleGetGlobal cuModuleGetGlobal_v2
- #define cuMemGetInfo cuMemGetInfo_v2
- #define cuMemAlloc cuMemAlloc_v2
- #define cuMemAllocPitch cuMemAllocPitch_v2
- #define cuMemFree cuMemFree_v2
- #define cuMemGetAddressRange cuMemGetAddressRange_v2
- #define cuMemAllocHost cuMemAllocHost_v2
- #define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2
- #define cuMemcpyHtoD __CUDA_API_PTDS(cuMemcpyHtoD_v2)
- #define cuMemcpyDtoH __CUDA_API_PTDS(cuMemcpyDtoH_v2)
- #define cuMemcpyDtoD __CUDA_API_PTDS(cuMemcpyDtoD_v2)
- #define cuMemcpyDtoA __CUDA_API_PTDS(cuMemcpyDtoA_v2)
- #define cuMemcpyAtoD __CUDA_API_PTDS(cuMemcpyAtoD_v2)
- #define cuMemcpyHtoA __CUDA_API_PTDS(cuMemcpyHtoA_v2)
- #define cuMemcpyAtoH __CUDA_API_PTDS(cuMemcpyAtoH_v2)
- #define cuMemcpyAtoA __CUDA_API_PTDS(cuMemcpyAtoA_v2)
- #define cuMemcpyHtoAAsync __CUDA_API_PTSZ(cuMemcpyHtoAAsync_v2)
- #define cuMemcpyAtoHAsync __CUDA_API_PTSZ(cuMemcpyAtoHAsync_v2)
- #define cuMemcpy2D __CUDA_API_PTDS(cuMemcpy2D_v2)
- #define cuMemcpy2DUnaligned __CUDA_API_PTDS(cuMemcpy2DUnaligned_v2)
- #define cuMemcpy3D __CUDA_API_PTDS(cuMemcpy3D_v2)
- #define cuMemcpyHtoDAsync __CUDA_API_PTSZ(cuMemcpyHtoDAsync_v2)
- #define cuMemcpyDtoHAsync __CUDA_API_PTSZ(cuMemcpyDtoHAsync_v2)
- #define cuMemcpyDtoDAsync __CUDA_API_PTSZ(cuMemcpyDtoDAsync_v2)
- #define cuMemcpy2DAsync __CUDA_API_PTSZ(cuMemcpy2DAsync_v2)
- #define cuMemcpy3DAsync __CUDA_API_PTSZ(cuMemcpy3DAsync_v2)
- #define cuMemcpyBatchAsync __CUDA_API_PTSZ(cuMemcpyBatchAsync)
- #define cuMemcpy3DBatchAsync __CUDA_API_PTSZ(cuMemcpy3DBatchAsync)
- #define cuMemsetD8 __CUDA_API_PTDS(cuMemsetD8_v2)
- #define cuMemsetD16 __CUDA_API_PTDS(cuMemsetD16_v2)
- #define cuMemsetD32 __CUDA_API_PTDS(cuMemsetD32_v2)
- #define cuMemsetD2D8 __CUDA_API_PTDS(cuMemsetD2D8_v2)
- #define cuMemsetD2D16 __CUDA_API_PTDS(cuMemsetD2D16_v2)
- #define cuMemsetD2D32 __CUDA_API_PTDS(cuMemsetD2D32_v2)
- #define cuArrayCreate cuArrayCreate_v2
- #define cuArrayGetDescriptor cuArrayGetDescriptor_v2
- #define cuArray3DCreate cuArray3DCreate_v2
- #define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2
- #define cuTexRefSetAddress cuTexRefSetAddress_v2
- #define cuTexRefGetAddress cuTexRefGetAddress_v2
- #define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMappedPointer_v2
- #define cuCtxDestroy cuCtxDestroy_v2
- #define cuCtxPopCurrent cuCtxPopCurrent_v2
- #define cuCtxPushCurrent cuCtxPushCurrent_v2
- #define cuStreamDestroy cuStreamDestroy_v2
- #define cuEventDestroy cuEventDestroy_v2
- #define cuTexRefSetAddress2D cuTexRefSetAddress2D_v3
- #define cuLinkCreate cuLinkCreate_v2
- #define cuLinkAddData cuLinkAddData_v2
- #define cuLinkAddFile cuLinkAddFile_v2
- #define cuMemHostRegister cuMemHostRegister_v2
- #define cuGraphicsResourceSetMapFlags cuGraphicsResourceSetMapFlags_v2
- #define cuStreamBeginCapture __CUDA_API_PTSZ(cuStreamBeginCapture_v2)
- #define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2
- #define cuDevicePrimaryCtxReset cuDevicePrimaryCtxReset_v2
- #define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2
- #define cuDeviceGetUuid_v2 cuDeviceGetUuid_v2
- #define cuIpcOpenMemHandle cuIpcOpenMemHandle_v2
- #define cuGraphInstantiate cuGraphInstantiateWithFlags
- #define cuGraphExecUpdate cuGraphExecUpdate_v2
- #define cuGetProcAddress cuGetProcAddress_v2
- #define cuGraphAddKernelNode cuGraphAddKernelNode_v2
- #define cuGraphKernelNodeGetParams cuGraphKernelNodeGetParams_v2
- #define cuGraphKernelNodeSetParams cuGraphKernelNodeSetParams_v2
- #define cuGraphExecKernelNodeSetParams cuGraphExecKernelNodeSetParams_v2
- #define cuStreamWriteValue32 __CUDA_API_PTSZ(cuStreamWriteValue32_v2)
- #define cuStreamWaitValue32 __CUDA_API_PTSZ(cuStreamWaitValue32_v2)
- #define cuStreamWriteValue64 __CUDA_API_PTSZ(cuStreamWriteValue64_v2)
- #define cuStreamWaitValue64 __CUDA_API_PTSZ(cuStreamWaitValue64_v2)
- #define cuStreamBatchMemOp __CUDA_API_PTSZ(cuStreamBatchMemOp_v2)
- #define cuStreamGetCaptureInfo __CUDA_API_PTSZ(cuStreamGetCaptureInfo_v2)
- #define cuStreamGetCaptureInfo_v2 __CUDA_API_PTSZ(cuStreamGetCaptureInfo_v2)
- #if defined(__CUDA_API_PER_THREAD_DEFAULT_STREAM)
- #define cuMemcpy __CUDA_API_PTDS(cuMemcpy)
- #define cuMemcpyAsync __CUDA_API_PTSZ(cuMemcpyAsync)
- #define cuMemcpyPeer __CUDA_API_PTDS(cuMemcpyPeer)
- #define cuMemcpyPeerAsync __CUDA_API_PTSZ(cuMemcpyPeerAsync)
- #define cuMemcpy3DPeer __CUDA_API_PTDS(cuMemcpy3DPeer)
- #define cuMemcpy3DPeerAsync __CUDA_API_PTSZ(cuMemcpy3DPeerAsync)
- #define cuMemPrefetchAsync __CUDA_API_PTSZ(cuMemPrefetchAsync)
- #define cuMemPrefetchAsync_v2 __CUDA_API_PTSZ(cuMemPrefetchAsync_v2)
- #define cuMemsetD8Async __CUDA_API_PTSZ(cuMemsetD8Async)
- #define cuMemsetD16Async __CUDA_API_PTSZ(cuMemsetD16Async)
- #define cuMemsetD32Async __CUDA_API_PTSZ(cuMemsetD32Async)
- #define cuMemsetD2D8Async __CUDA_API_PTSZ(cuMemsetD2D8Async)
- #define cuMemsetD2D16Async __CUDA_API_PTSZ(cuMemsetD2D16Async)
- #define cuMemsetD2D32Async __CUDA_API_PTSZ(cuMemsetD2D32Async)
- #define cuStreamGetPriority __CUDA_API_PTSZ(cuStreamGetPriority)
- #define cuStreamGetId __CUDA_API_PTSZ(cuStreamGetId)
- #define cuStreamGetFlags __CUDA_API_PTSZ(cuStreamGetFlags)
- #define cuStreamGetDevice __CUDA_API_PTSZ(cuStreamGetDevice)
- #define cuStreamGetCtx __CUDA_API_PTSZ(cuStreamGetCtx)
- #define cuStreamGetCtx_v2 __CUDA_API_PTSZ(cuStreamGetCtx_v2)
- #define cuStreamWaitEvent __CUDA_API_PTSZ(cuStreamWaitEvent)
- #define cuStreamEndCapture __CUDA_API_PTSZ(cuStreamEndCapture)
- #define cuStreamIsCapturing __CUDA_API_PTSZ(cuStreamIsCapturing)
- #define cuStreamGetCaptureInfo_v3 __CUDA_API_PTSZ(cuStreamGetCaptureInfo_v3)
- #define cuStreamUpdateCaptureDependencies __CUDA_API_PTSZ(cuStreamUpdateCaptureDependencies)
- #define cuStreamUpdateCaptureDependencies_v2 __CUDA_API_PTSZ(cuStreamUpdateCaptureDependencies_v2)
- #define cuStreamAddCallback __CUDA_API_PTSZ(cuStreamAddCallback)
- #define cuStreamAttachMemAsync __CUDA_API_PTSZ(cuStreamAttachMemAsync)
- #define cuStreamQuery __CUDA_API_PTSZ(cuStreamQuery)
- #define cuStreamSynchronize __CUDA_API_PTSZ(cuStreamSynchronize)
- #define cuEventRecord __CUDA_API_PTSZ(cuEventRecord)
- #define cuEventRecordWithFlags __CUDA_API_PTSZ(cuEventRecordWithFlags)
- #define cuLaunchKernel __CUDA_API_PTSZ(cuLaunchKernel)
- #define cuLaunchKernelEx __CUDA_API_PTSZ(cuLaunchKernelEx)
- #define cuLaunchHostFunc __CUDA_API_PTSZ(cuLaunchHostFunc)
- #define cuGraphicsMapResources __CUDA_API_PTSZ(cuGraphicsMapResources)
- #define cuGraphicsUnmapResources __CUDA_API_PTSZ(cuGraphicsUnmapResources)
- #define cuLaunchCooperativeKernel __CUDA_API_PTSZ(cuLaunchCooperativeKernel)
- #define cuSignalExternalSemaphoresAsync __CUDA_API_PTSZ(cuSignalExternalSemaphoresAsync)
- #define cuWaitExternalSemaphoresAsync __CUDA_API_PTSZ(cuWaitExternalSemaphoresAsync)
- #define cuGraphInstantiateWithParams __CUDA_API_PTSZ(cuGraphInstantiateWithParams)
- #define cuGraphUpload __CUDA_API_PTSZ(cuGraphUpload)
- #define cuGraphLaunch __CUDA_API_PTSZ(cuGraphLaunch)
- #define cuStreamCopyAttributes __CUDA_API_PTSZ(cuStreamCopyAttributes)
- #define cuStreamGetAttribute __CUDA_API_PTSZ(cuStreamGetAttribute)
- #define cuStreamSetAttribute __CUDA_API_PTSZ(cuStreamSetAttribute)
- #define cuMemMapArrayAsync __CUDA_API_PTSZ(cuMemMapArrayAsync)
- #define cuMemFreeAsync __CUDA_API_PTSZ(cuMemFreeAsync)
- #define cuMemAllocAsync __CUDA_API_PTSZ(cuMemAllocAsync)
- #define cuMemAllocFromPoolAsync __CUDA_API_PTSZ(cuMemAllocFromPoolAsync)
- #define cuStreamBeginCaptureToGraph __CUDA_API_PTSZ(cuStreamBeginCaptureToGraph)
- #endif
- #define cuMemBatchDecompressAsync __CUDA_API_PTSZ(cuMemBatchDecompressAsync)
- /**
- * \file cuda.h
- * \brief Header file for the CUDA Toolkit application programming interface.
- *
- * \file cudaGL.h
- * \brief Header file for the OpenGL interoperability functions of the
- * low-level CUDA driver application programming interface.
- *
- * \file cudaD3D9.h
- * \brief Header file for the Direct3D 9 interoperability functions of the
- * low-level CUDA driver application programming interface.
- */
- /**
- * \defgroup CUDA_TYPES Data types used by CUDA driver
- * @{
- */
- /**
- * CUDA API version number
- */
- #define CUDA_VERSION 12080
- #ifdef __cplusplus
- extern "C" {
- #endif
- /**
- * CUDA device pointer
- * CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
- */
- #if defined(_WIN64) || defined(__LP64__)
- typedef unsigned long long CUdeviceptr_v2;
- #else
- typedef unsigned int CUdeviceptr_v2;
- #endif
- typedef CUdeviceptr_v2 CUdeviceptr; /**< CUDA device pointer */
- typedef int CUdevice_v1; /**< CUDA device */
- typedef CUdevice_v1 CUdevice; /**< CUDA device */
- typedef struct CUctx_st *CUcontext; /**< A regular context handle */
- typedef struct CUmod_st *CUmodule; /**< CUDA module */
- typedef struct CUfunc_st *CUfunction; /**< CUDA function */
- typedef struct CUlib_st *CUlibrary; /**< CUDA library */
- typedef struct CUkern_st *CUkernel; /**< CUDA kernel */
- typedef struct CUarray_st *CUarray; /**< CUDA array */
- typedef struct CUmipmappedArray_st *CUmipmappedArray; /**< CUDA mipmapped array */
- typedef struct CUtexref_st *CUtexref; /**< CUDA texture reference */
- typedef struct CUsurfref_st *CUsurfref; /**< CUDA surface reference */
- typedef struct CUevent_st *CUevent; /**< CUDA event */
- typedef struct CUstream_st *CUstream; /**< CUDA stream */
- typedef struct CUgraphicsResource_st *CUgraphicsResource; /**< CUDA graphics interop resource */
- typedef unsigned long long CUtexObject_v1; /**< An opaque value that represents a CUDA texture object */
- typedef CUtexObject_v1 CUtexObject; /**< An opaque value that represents a CUDA texture object */
- typedef unsigned long long CUsurfObject_v1; /**< An opaque value that represents a CUDA surface object */
- typedef CUsurfObject_v1 CUsurfObject; /**< An opaque value that represents a CUDA surface object */
- typedef struct CUextMemory_st *CUexternalMemory; /**< CUDA external memory */
- typedef struct CUextSemaphore_st *CUexternalSemaphore; /**< CUDA external semaphore */
- typedef struct CUgraph_st *CUgraph; /**< CUDA graph */
- typedef struct CUgraphNode_st *CUgraphNode; /**< CUDA graph node */
- typedef struct CUgraphExec_st *CUgraphExec; /**< CUDA executable graph */
- typedef struct CUmemPoolHandle_st *CUmemoryPool; /**< CUDA memory pool */
- typedef struct CUuserObject_st *CUuserObject; /**< CUDA user object for graphs */
- typedef cuuint64_t CUgraphConditionalHandle; /**< CUDA graph conditional handle */
- typedef struct CUgraphDeviceUpdatableNode_st *CUgraphDeviceNode; /**< CUDA graph device node handle */
- typedef struct CUasyncCallbackEntry_st *CUasyncCallbackHandle; /**< CUDA async notification callback handle */
- /*!
- * \typedef typedef struct CUgreenCtx_st* CUgreenCtx
- * A green context handle. This handle can be used safely from only one CPU thread at a time.
- * Created via ::cuGreenCtxCreate
- */
- typedef struct CUgreenCtx_st *CUgreenCtx;
- #ifndef CU_UUID_HAS_BEEN_DEFINED
- #define CU_UUID_HAS_BEEN_DEFINED
- typedef struct CUuuid_st { /**< CUDA definition of UUID */
- char bytes[16];
- } CUuuid;
- #endif
- /**
- * CUDA IPC handle size
- */
- #define CU_IPC_HANDLE_SIZE 64
- /**
- * Fabric handle - An opaque handle representing a memory allocation
- * that can be exported to processes in same or different nodes. For IPC
- * between processes on different nodes they must be connected via the
- * NVSwitch fabric.
- */
- typedef struct CUmemFabricHandle_st {
- unsigned char data[CU_IPC_HANDLE_SIZE];
- } CUmemFabricHandle_v1;
- typedef CUmemFabricHandle_v1 CUmemFabricHandle;
- /**
- * CUDA IPC event handle
- */
- typedef struct CUipcEventHandle_st {
- char reserved[CU_IPC_HANDLE_SIZE];
- } CUipcEventHandle_v1;
- typedef CUipcEventHandle_v1 CUipcEventHandle;
- /**
- * CUDA IPC mem handle
- */
- typedef struct CUipcMemHandle_st {
- char reserved[CU_IPC_HANDLE_SIZE];
- } CUipcMemHandle_v1;
- typedef CUipcMemHandle_v1 CUipcMemHandle;
- /**
- * CUDA Ipc Mem Flags
- */
- typedef enum CUipcMem_flags_enum {
- CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1 /**< Automatically enable peer access between remote devices as needed */
- } CUipcMem_flags;
- /**
- * CUDA Mem Attach Flags
- */
- typedef enum CUmemAttach_flags_enum {
- CU_MEM_ATTACH_GLOBAL = 0x1, /**< Memory can be accessed by any stream on any device */
- CU_MEM_ATTACH_HOST = 0x2, /**< Memory cannot be accessed by any stream on any device */
- CU_MEM_ATTACH_SINGLE = 0x4 /**< Memory can only be accessed by a single stream on the associated device */
- } CUmemAttach_flags;
- /**
- * Context creation flags
- */
- typedef enum CUctx_flags_enum {
- CU_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */
- CU_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */
- CU_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */
- CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */
- CU_CTX_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling
- * \deprecated This flag was deprecated as of CUDA 4.0
- * and was replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. */
- CU_CTX_SCHED_MASK = 0x07,
- CU_CTX_MAP_HOST = 0x08, /**< \deprecated This flag was deprecated as of CUDA 11.0
- * and it no longer has any effect. All contexts
- * as of CUDA 3.2 behave as though the flag is enabled. */
- CU_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */
- CU_CTX_COREDUMP_ENABLE = 0x20, /**< Trigger coredumps from exceptions in this context */
- CU_CTX_USER_COREDUMP_ENABLE= 0x40, /**< Enable user pipe to trigger coredumps in this context */
- CU_CTX_SYNC_MEMOPS = 0x80, /**< Ensure synchronous memory operations on this context will synchronize */
- CU_CTX_FLAGS_MASK = 0xFF
- } CUctx_flags;
- /**
- * Event sched flags
- */
- typedef enum CUevent_sched_flags_enum {
- CU_EVENT_SCHED_AUTO = 0x00, /**< Automatic scheduling */
- CU_EVENT_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */
- CU_EVENT_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */
- CU_EVENT_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */
- } CUevent_sched_flags;
- /**
- * NVCL event scheduling flags
- */
- typedef enum cl_event_flags_enum {
- NVCL_EVENT_SCHED_AUTO = 0x00, /**< Automatic scheduling */
- NVCL_EVENT_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */
- NVCL_EVENT_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */
- NVCL_EVENT_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */
- } cl_event_flags;
- /**
- * NVCL context scheduling flags
- */
- typedef enum cl_context_flags_enum {
- NVCL_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */
- NVCL_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */
- NVCL_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */
- NVCL_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */
- } cl_context_flags;
- /**
- * Stream creation flags
- */
- typedef enum CUstream_flags_enum {
- CU_STREAM_DEFAULT = 0x0, /**< Default stream flag */
- CU_STREAM_NON_BLOCKING = 0x1 /**< Stream does not synchronize with stream 0 (the NULL stream) */
- } CUstream_flags;
- /**
- * Legacy stream handle
- *
- * Stream handle that can be passed as a CUstream to use an implicit stream
- * with legacy synchronization behavior.
- *
- * See details of the \link_sync_behavior
- */
- #define CU_STREAM_LEGACY ((CUstream)0x1)
- /**
- * Per-thread stream handle
- *
- * Stream handle that can be passed as a CUstream to use an implicit stream
- * with per-thread synchronization behavior.
- *
- * See details of the \link_sync_behavior
- */
- #define CU_STREAM_PER_THREAD ((CUstream)0x2)
- /**
- * Event creation flags
- */
- typedef enum CUevent_flags_enum {
- CU_EVENT_DEFAULT = 0x0, /**< Default event flag */
- CU_EVENT_BLOCKING_SYNC = 0x1, /**< Event uses blocking synchronization */
- CU_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */
- CU_EVENT_INTERPROCESS = 0x4 /**< Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set */
- } CUevent_flags;
- /**
- * Event record flags
- */
- typedef enum CUevent_record_flags_enum {
- CU_EVENT_RECORD_DEFAULT = 0x0, /**< Default event record flag */
- CU_EVENT_RECORD_EXTERNAL = 0x1 /**< When using stream capture, create an event record node
- * instead of the default behavior. This flag is invalid
- * when used outside of capture. */
- } CUevent_record_flags;
- /**
- * Event wait flags
- */
- typedef enum CUevent_wait_flags_enum {
- CU_EVENT_WAIT_DEFAULT = 0x0, /**< Default event wait flag */
- CU_EVENT_WAIT_EXTERNAL = 0x1 /**< When using stream capture, create an event wait node
- * instead of the default behavior. This flag is invalid
- * when used outside of capture.*/
- } CUevent_wait_flags;
- /**
- * Flags for ::cuStreamWaitValue32 and ::cuStreamWaitValue64
- */
- typedef enum CUstreamWaitValue_flags_enum {
- CU_STREAM_WAIT_VALUE_GEQ = 0x0, /**< Wait until (int32_t)(*addr - value) >= 0 (or int64_t for 64 bit
- values). Note this is a cyclic comparison which ignores wraparound.
- (Default behavior.) */
- CU_STREAM_WAIT_VALUE_EQ = 0x1, /**< Wait until *addr == value. */
- CU_STREAM_WAIT_VALUE_AND = 0x2, /**< Wait until (*addr & value) != 0. */
- CU_STREAM_WAIT_VALUE_NOR = 0x3, /**< Wait until ~(*addr | value) != 0. Support for this operation can be
- queried with ::cuDeviceGetAttribute() and
- ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR.*/
- CU_STREAM_WAIT_VALUE_FLUSH = 1<<30 /**< Follow the wait operation with a flush of outstanding remote writes. This
- means that, if a remote write operation is guaranteed to have reached the
- device before the wait can be satisfied, that write is guaranteed to be
- visible to downstream device work. The device is permitted to reorder
- remote writes internally. For example, this flag would be required if
- two remote writes arrive in a defined order, the wait is satisfied by the
- second write, and downstream work needs to observe the first write.
- Support for this operation is restricted to selected platforms and can be
- queried with ::CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES.*/
- } CUstreamWaitValue_flags;
- /**
- * Flags for ::cuStreamWriteValue32
- */
- typedef enum CUstreamWriteValue_flags_enum {
- CU_STREAM_WRITE_VALUE_DEFAULT = 0x0, /**< Default behavior */
- CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1 /**< Permits the write to be reordered with writes which were issued
- before it, as a performance optimization. Normally,
- ::cuStreamWriteValue32 will provide a memory fence before the
- write, which has similar semantics to
- __threadfence_system() but is scoped to the stream
- rather than a CUDA thread.
- This flag is not supported in the v2 API. */
- } CUstreamWriteValue_flags;
- /**
- * Operations for ::cuStreamBatchMemOp
- */
- typedef enum CUstreamBatchMemOpType_enum {
- CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1, /**< Represents a ::cuStreamWaitValue32 operation */
- CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2, /**< Represents a ::cuStreamWriteValue32 operation */
- CU_STREAM_MEM_OP_WAIT_VALUE_64 = 4, /**< Represents a ::cuStreamWaitValue64 operation */
- CU_STREAM_MEM_OP_WRITE_VALUE_64 = 5, /**< Represents a ::cuStreamWriteValue64 operation */
- CU_STREAM_MEM_OP_BARRIER = 6, /**< Insert a memory barrier of the specified type */
- CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3 /**< This has the same effect as ::CU_STREAM_WAIT_VALUE_FLUSH, but as a
- standalone operation. */
- } CUstreamBatchMemOpType;
- /**
- * Flags for ::cuStreamMemoryBarrier
- */
- typedef enum CUstreamMemoryBarrier_flags_enum {
- CU_STREAM_MEMORY_BARRIER_TYPE_SYS = 0x0, /**< System-wide memory barrier. */
- CU_STREAM_MEMORY_BARRIER_TYPE_GPU = 0x1 /**< Limit memory barrier scope to the GPU. */
- } CUstreamMemoryBarrier_flags;
- /**
- * Per-operation parameters for ::cuStreamBatchMemOp
- */
- typedef union CUstreamBatchMemOpParams_union {
- CUstreamBatchMemOpType operation;
- struct CUstreamMemOpWaitValueParams_st {
- CUstreamBatchMemOpType operation;
- CUdeviceptr address;
- union {
- cuuint32_t value;
- cuuint64_t value64;
- };
- unsigned int flags;
- CUdeviceptr alias; /**< For driver internal use. Initial value is unimportant. */
- } waitValue;
- struct CUstreamMemOpWriteValueParams_st {
- CUstreamBatchMemOpType operation;
- CUdeviceptr address;
- union {
- cuuint32_t value;
- cuuint64_t value64;
- };
- unsigned int flags;
- CUdeviceptr alias; /**< For driver internal use. Initial value is unimportant. */
- } writeValue;
- struct CUstreamMemOpFlushRemoteWritesParams_st {
- CUstreamBatchMemOpType operation;
- unsigned int flags;
- } flushRemoteWrites;
- struct CUstreamMemOpMemoryBarrierParams_st { /**< Only supported in the _v2 API */
- CUstreamBatchMemOpType operation;
- unsigned int flags;
- } memoryBarrier;
- cuuint64_t pad[6];
- } CUstreamBatchMemOpParams_v1;
- typedef CUstreamBatchMemOpParams_v1 CUstreamBatchMemOpParams;
- typedef struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st {
- CUcontext ctx;
- unsigned int count;
- CUstreamBatchMemOpParams *paramArray;
- unsigned int flags;
- } CUDA_BATCH_MEM_OP_NODE_PARAMS_v1;
- typedef CUDA_BATCH_MEM_OP_NODE_PARAMS_v1 CUDA_BATCH_MEM_OP_NODE_PARAMS;
- /**
- * Batch memory operation node parameters
- */
- typedef struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st {
- CUcontext ctx; /**< Context to use for the operations. */
- unsigned int count; /**< Number of operations in paramArray. */
- CUstreamBatchMemOpParams *paramArray; /**< Array of batch memory operations. */
- unsigned int flags; /**< Flags to control the node. */
- } CUDA_BATCH_MEM_OP_NODE_PARAMS_v2;
- /**
- * Occupancy calculator flag
- */
- typedef enum CUoccupancy_flags_enum {
- CU_OCCUPANCY_DEFAULT = 0x0, /**< Default behavior */
- CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1 /**< Assume global caching is enabled and cannot be automatically turned off */
- } CUoccupancy_flags;
- /**
- * Flags for ::cuStreamUpdateCaptureDependencies
- */
- typedef enum CUstreamUpdateCaptureDependencies_flags_enum {
- CU_STREAM_ADD_CAPTURE_DEPENDENCIES = 0x0, /**< Add new nodes to the dependency set */
- CU_STREAM_SET_CAPTURE_DEPENDENCIES = 0x1 /**< Replace the dependency set with the new nodes */
- } CUstreamUpdateCaptureDependencies_flags;
- /**
- * Types of async notification that can be sent
- */
- typedef enum CUasyncNotificationType_enum {
- CU_ASYNC_NOTIFICATION_TYPE_OVER_BUDGET = 0x1
- } CUasyncNotificationType;
- /**
- * Information passed to the user via the async notification callback
- */
- typedef struct CUasyncNotificationInfo_st {
- CUasyncNotificationType type;
- union {
- struct {
- unsigned long long bytesOverBudget;
- } overBudget;
- } info;
- } CUasyncNotificationInfo;
- /**
- * CUDA async notification callback
- * \param info Information describing what actions to take as a result of this trim notification.
- * \param userData Pointer to user defined data provided at registration.
- * \param callback The callback handle associated with this specific callback.
- */
- typedef void (*CUasyncCallback)(CUasyncNotificationInfo *info, void *userData, CUasyncCallbackHandle callback);
- /**
- * Array formats
- */
- typedef enum CUarray_format_enum {
- CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit integers */
- CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */
- CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */
- CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit integers */
- CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit integers */
- CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit integers */
- CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */
- CU_AD_FORMAT_FLOAT = 0x20, /**< 32-bit floating point */
- CU_AD_FORMAT_NV12 = 0xb0, /**< 8-bit YUV planar format, with 4:2:0 sampling */
- CU_AD_FORMAT_UNORM_INT8X1 = 0xc0, /**< 1 channel unsigned 8-bit normalized integer */
- CU_AD_FORMAT_UNORM_INT8X2 = 0xc1, /**< 2 channel unsigned 8-bit normalized integer */
- CU_AD_FORMAT_UNORM_INT8X4 = 0xc2, /**< 4 channel unsigned 8-bit normalized integer */
- CU_AD_FORMAT_UNORM_INT16X1 = 0xc3, /**< 1 channel unsigned 16-bit normalized integer */
- CU_AD_FORMAT_UNORM_INT16X2 = 0xc4, /**< 2 channel unsigned 16-bit normalized integer */
- CU_AD_FORMAT_UNORM_INT16X4 = 0xc5, /**< 4 channel unsigned 16-bit normalized integer */
- CU_AD_FORMAT_SNORM_INT8X1 = 0xc6, /**< 1 channel signed 8-bit normalized integer */
- CU_AD_FORMAT_SNORM_INT8X2 = 0xc7, /**< 2 channel signed 8-bit normalized integer */
- CU_AD_FORMAT_SNORM_INT8X4 = 0xc8, /**< 4 channel signed 8-bit normalized integer */
- CU_AD_FORMAT_SNORM_INT16X1 = 0xc9, /**< 1 channel signed 16-bit normalized integer */
- CU_AD_FORMAT_SNORM_INT16X2 = 0xca, /**< 2 channel signed 16-bit normalized integer */
- CU_AD_FORMAT_SNORM_INT16X4 = 0xcb, /**< 4 channel signed 16-bit normalized integer */
- CU_AD_FORMAT_BC1_UNORM = 0x91, /**< 4 channel unsigned normalized block-compressed (BC1 compression) format */
- CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92, /**< 4 channel unsigned normalized block-compressed (BC1 compression) format with sRGB encoding*/
- CU_AD_FORMAT_BC2_UNORM = 0x93, /**< 4 channel unsigned normalized block-compressed (BC2 compression) format */
- CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94, /**< 4 channel unsigned normalized block-compressed (BC2 compression) format with sRGB encoding*/
- CU_AD_FORMAT_BC3_UNORM = 0x95, /**< 4 channel unsigned normalized block-compressed (BC3 compression) format */
- CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96, /**< 4 channel unsigned normalized block-compressed (BC3 compression) format with sRGB encoding*/
- CU_AD_FORMAT_BC4_UNORM = 0x97, /**< 1 channel unsigned normalized block-compressed (BC4 compression) format */
- CU_AD_FORMAT_BC4_SNORM = 0x98, /**< 1 channel signed normalized block-compressed (BC4 compression) format */
- CU_AD_FORMAT_BC5_UNORM = 0x99, /**< 2 channel unsigned normalized block-compressed (BC5 compression) format */
- CU_AD_FORMAT_BC5_SNORM = 0x9a, /**< 2 channel signed normalized block-compressed (BC5 compression) format */
- CU_AD_FORMAT_BC6H_UF16 = 0x9b, /**< 3 channel unsigned half-float block-compressed (BC6H compression) format */
- CU_AD_FORMAT_BC6H_SF16 = 0x9c, /**< 3 channel signed half-float block-compressed (BC6H compression) format */
- CU_AD_FORMAT_BC7_UNORM = 0x9d, /**< 4 channel unsigned normalized block-compressed (BC7 compression) format */
- CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e, /**< 4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding */
- CU_AD_FORMAT_P010 = 0x9f, /**< 10-bit YUV planar format, with 4:2:0 sampling */
- CU_AD_FORMAT_P016 = 0xa1, /**< 16-bit YUV planar format, with 4:2:0 sampling */
- CU_AD_FORMAT_NV16 = 0xa2, /**< 8-bit YUV planar format, with 4:2:2 sampling */
- CU_AD_FORMAT_P210 = 0xa3, /**< 10-bit YUV planar format, with 4:2:2 sampling */
- CU_AD_FORMAT_P216 = 0xa4, /**< 16-bit YUV planar format, with 4:2:2 sampling */
- CU_AD_FORMAT_YUY2 = 0xa5, /**< 2 channel, 8-bit YUV packed planar format, with 4:2:2 sampling */
- CU_AD_FORMAT_Y210 = 0xa6, /**< 2 channel, 10-bit YUV packed planar format, with 4:2:2 sampling */
- CU_AD_FORMAT_Y216 = 0xa7, /**< 2 channel, 16-bit YUV packed planar format, with 4:2:2 sampling */
- CU_AD_FORMAT_AYUV = 0xa8, /**< 4 channel, 8-bit YUV packed planar format, with 4:4:4 sampling */
- CU_AD_FORMAT_Y410 = 0xa9, /**< 10-bit YUV packed planar format, with 4:4:4 sampling */
- CU_AD_FORMAT_Y416 = 0xb1, /**< 4 channel, 12-bit YUV packed planar format, with 4:4:4 sampling */
- CU_AD_FORMAT_Y444_PLANAR8 = 0xb2, /**< 3 channel 8-bit YUV planar format, with 4:4:4 sampling */
- CU_AD_FORMAT_Y444_PLANAR10 = 0xb3, /**< 3 channel 10-bit YUV planar format, with 4:4:4 sampling */
- CU_AD_FORMAT_YUV444_8bit_SemiPlanar = 0xb4, /**< 3 channel 8-bit YUV semi-planar format, with 4:4:4 sampling */
- CU_AD_FORMAT_YUV444_16bit_SemiPlanar = 0xb5, /**< 3 channel 16-bit YUV semi-planar format, with 4:4:4 sampling */
- CU_AD_FORMAT_UNORM_INT_101010_2 = 0x50, /**< 4 channel unorm R10G10B10A2 RGB format */
- CU_AD_FORMAT_MAX = 0x7FFFFFFF
- } CUarray_format;
- /**
- * Texture reference addressing modes
- */
- typedef enum CUaddress_mode_enum {
- CU_TR_ADDRESS_MODE_WRAP = 0, /**< Wrapping address mode */
- CU_TR_ADDRESS_MODE_CLAMP = 1, /**< Clamp to edge address mode */
- CU_TR_ADDRESS_MODE_MIRROR = 2, /**< Mirror address mode */
- CU_TR_ADDRESS_MODE_BORDER = 3 /**< Border address mode */
- } CUaddress_mode;
- /**
- * Texture reference filtering modes
- */
- typedef enum CUfilter_mode_enum {
- CU_TR_FILTER_MODE_POINT = 0, /**< Point filter mode */
- CU_TR_FILTER_MODE_LINEAR = 1 /**< Linear filter mode */
- } CUfilter_mode;
- /**
- * Device properties
- */
- typedef enum CUdevice_attribute_enum {
- CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximum number of threads per block */
- CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximum block dimension X */
- CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximum block dimension Y */
- CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximum block dimension Z */
- CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximum grid dimension X */
- CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximum grid dimension Y */
- CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximum grid dimension Z */
- CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximum shared memory available per block in bytes */
- CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */
- CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */
- CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp size in threads */
- CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximum pitch in bytes allowed by memory copies */
- CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximum number of 32-bit registers available per block */
- CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */
- CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Typical clock frequency in kilohertz */
- CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignment requirement for textures */
- CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */
- CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number of multiprocessors on device */
- CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specifies whether there is a run time limit on kernels */
- CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device is integrated with host memory */
- CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device can map host memory into CUDA address space */
- CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Compute mode (See ::CUcomputemode for details) */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximum 1D texture width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximum 2D texture width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximum 2D texture height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximum 3D texture width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximum 3D texture height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximum 3D texture depth */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, /**< Maximum 2D layered texture width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, /**< Maximum 2D layered texture height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, /**< Maximum layers in a 2D layered texture */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */
- CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignment requirement for surfaces */
- CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device can possibly execute multiple kernels concurrently */
- CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device has ECC support enabled */
- CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bus ID of the device */
- CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI device ID of the device */
- CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, /**< Device is using TCC driver model */
- CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, /**< Peak memory clock frequency in kilohertz */
- CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, /**< Global memory bus width in bits */
- CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, /**< Size of L2 cache in bytes */
- CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, /**< Maximum resident threads per multiprocessor */
- CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, /**< Number of asynchronous engines */
- CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, /**< Device shares a unified address space with the host */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, /**< Maximum 1D layered texture width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, /**< Maximum layers in a 1D layered texture */
- CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, /**< Deprecated, do not use. */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, /**< Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, /**< Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, /**< Alternate maximum 3D texture width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, /**< Alternate maximum 3D texture height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, /**< Alternate maximum 3D texture depth */
- CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, /**< PCI domain ID of the device */
- CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, /**< Pitch alignment requirement for textures */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, /**< Maximum cubemap texture width/height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, /**< Maximum cubemap layered texture width/height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, /**< Maximum layers in a cubemap layered texture */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, /**< Maximum 1D surface width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, /**< Maximum 2D surface width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, /**< Maximum 2D surface height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, /**< Maximum 3D surface width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, /**< Maximum 3D surface height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, /**< Maximum 3D surface depth */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, /**< Maximum 1D layered surface width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, /**< Maximum layers in a 1D layered surface */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, /**< Maximum 2D layered surface width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, /**< Maximum 2D layered surface height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, /**< Maximum layers in a 2D layered surface */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, /**< Maximum cubemap surface width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, /**< Maximum cubemap layered surface width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, /**< Maximum layers in a cubemap layered surface */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, /**< Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or cuDeviceGetTexture1DLinearMaxWidth() instead. */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, /**< Maximum 2D linear texture width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, /**< Maximum 2D linear texture height */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, /**< Maximum 2D linear texture pitch in bytes */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, /**< Maximum mipmapped 2D texture width */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, /**< Maximum mipmapped 2D texture height */
- CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, /**< Major compute capability version number */
- CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, /**< Minor compute capability version number */
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, /**< Maximum mipmapped 1D texture width */
- CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, /**< Device supports stream priorities */
- CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, /**< Device supports caching globals in L1 */
- CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, /**< Device supports caching locals in L1 */
- CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, /**< Maximum shared memory available per multiprocessor in bytes */
- CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, /**< Maximum number of 32-bit registers available per multiprocessor */
- CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, /**< Device can allocate managed memory on this system */
- CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, /**< Device is on a multi-GPU board */
- CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, /**< Unique id for a group of devices on the same multi-GPU board */
- CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86, /**< Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)*/
- CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
- CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88, /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
- CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89, /**< Device can coherently access managed memory concurrently with the CPU */
- CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90, /**< Device supports compute preemption. */
- CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, /**< Device can access host registered memory at the same virtual address as the CPU */
- CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1 = 92, /**< Deprecated, along with v1 MemOps API, ::cuStreamBatchMemOp and related APIs are supported. */
- CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1 = 93, /**< Deprecated, along with v1 MemOps API, 64-bit operations are supported in ::cuStreamBatchMemOp and related APIs. */
- CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1 = 94, /**< Deprecated, along with v1 MemOps API, ::CU_STREAM_WAIT_VALUE_NOR is supported. */
- CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95, /**< Device supports launching cooperative kernels via ::cuLaunchCooperativeKernel */
- CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, /**< Deprecated, ::cuLaunchCooperativeKernelMultiDevice is deprecated. */
- CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, /**< Maximum optin shared memory per block */
- CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98, /**< The ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref CUDA_MEMOP for additional details. */
- CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99, /**< Device supports host memory registration via ::cudaHostRegister. */
- CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, /**< Device accesses pageable memory via the host's page tables. */
- CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, /**< The host can directly access managed memory on the device without migration. */
- CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102, /**< Deprecated, Use CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED*/
- CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102, /**< Device supports virtual memory management APIs like ::cuMemAddressReserve, ::cuMemCreate, ::cuMemMap and related APIs */
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103, /**< Device supports exporting memory to a posix file descriptor with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate */
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104, /**< Device supports exporting memory to a Win32 NT handle with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate */
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105, /**< Device supports exporting memory to a Win32 KMT handle with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate */
- CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106, /**< Maximum number of blocks per multiprocessor */
- CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107, /**< Device supports compression of memory */
- CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108, /**< Maximum L2 persisting lines capacity setting in bytes. */
- CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109, /**< Maximum value of CUaccessPolicyWindow::num_bytes. */
- CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110, /**< Device supports specifying the GPUDirect RDMA flag with ::cuMemCreate */
- CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111, /**< Shared memory reserved by CUDA driver per block in bytes */
- CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112, /**< Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays */
- CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113, /**< Device supports using the ::cuMemHostRegister flag ::CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU */
- CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114, /**< External timeline semaphore interop is supported on the device */
- CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115, /**< Device supports using the ::cuMemAllocAsync and ::cuMemPool family of APIs */
- CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116, /**< Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information) */
- CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117, /**< The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the ::CUflushGPUDirectRDMAWritesOptions enum */
- CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118, /**< GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See ::CUGPUDirectRDMAWritesOrdering for the numerical values returned here. */
- CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119, /**< Handle types supported with mempool based IPC */
- CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH = 120, /**< Indicates device supports cluster launch */
- CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED = 121, /**< Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays */
- CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 122, /**< 64-bit operations are supported in ::cuStreamBatchMemOp and related MemOp APIs. */
- CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 123, /**< ::CU_STREAM_WAIT_VALUE_NOR is supported by MemOp APIs. */
- CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = 124, /**< Device supports buffer sharing with dma_buf mechanism. */
- CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED = 125, /**< Device supports IPC Events. */
- CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT = 126, /**< Number of memory domains the device supports. */
- CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED = 127, /**< Device supports accessing memory using Tensor Map. */
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED = 128, /**< Device supports exporting memory to a fabric handle with cuMemExportToShareableHandle() or requested with cuMemCreate() */
- CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS = 129, /**< Device supports unified function pointers. */
- CU_DEVICE_ATTRIBUTE_NUMA_CONFIG = 130, /**< NUMA configuration of a device: value is of type ::CUdeviceNumaConfig enum */
- CU_DEVICE_ATTRIBUTE_NUMA_ID = 131, /**< NUMA node ID of the GPU memory */
- CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED = 132, /**< Device supports switch multicast and reduction operations. */
- CU_DEVICE_ATTRIBUTE_MPS_ENABLED = 133, /**< Indicates if contexts created on this device will be shared via MPS */
- CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = 134, /**< NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA. */
- CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = 135, /**< Device supports CIG with D3D12. */
- CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_ALGORITHM_MASK = 136, /**< The returned valued shall be interpreted as a bitmask, where the individual bits are described by the ::CUmemDecompressAlgorithm enum. */
- CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_MAXIMUM_LENGTH = 137, /**< The returned valued is the maximum length in bytes of a single decompress operation that is allowed. */
- CU_DEVICE_ATTRIBUTE_GPU_PCI_DEVICE_ID = 139, /**< The combined 16-bit PCI device ID and 16-bit PCI vendor ID. */
- CU_DEVICE_ATTRIBUTE_GPU_PCI_SUBSYSTEM_ID = 140, /**< The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor ID. */
- CU_DEVICE_ATTRIBUTE_HOST_NUMA_MULTINODE_IPC_SUPPORTED = 143, /**< Device supports HOST_NUMA location IPC between nodes in a multi-node system. */
- CU_DEVICE_ATTRIBUTE_MAX
- } CUdevice_attribute;
- /**
- * Legacy device properties
- */
- typedef struct CUdevprop_st {
- int maxThreadsPerBlock; /**< Maximum number of threads per block */
- int maxThreadsDim[3]; /**< Maximum size of each dimension of a block */
- int maxGridSize[3]; /**< Maximum size of each dimension of a grid */
- int sharedMemPerBlock; /**< Shared memory available per block in bytes */
- int totalConstantMemory; /**< Constant memory available on device in bytes */
- int SIMDWidth; /**< Warp size in threads */
- int memPitch; /**< Maximum pitch in bytes allowed by memory copies */
- int regsPerBlock; /**< 32-bit registers available per block */
- int clockRate; /**< Clock frequency in kilohertz */
- int textureAlign; /**< Alignment requirement for textures */
- } CUdevprop_v1;
- typedef CUdevprop_v1 CUdevprop;
- /**
- * Pointer information
- */
- typedef enum CUpointer_attribute_enum {
- CU_POINTER_ATTRIBUTE_CONTEXT = 1, /**< The ::CUcontext on which a pointer was allocated or registered */
- CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2, /**< The ::CUmemorytype describing the physical location of a pointer */
- CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3, /**< The address at which a pointer's memory may be accessed on the device */
- CU_POINTER_ATTRIBUTE_HOST_POINTER = 4, /**< The address at which a pointer's memory may be accessed on the host */
- CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5, /**< A pair of tokens for use with the nv-p2p.h Linux kernel interface */
- CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6, /**< Synchronize every synchronous memory operation initiated on this region */
- CU_POINTER_ATTRIBUTE_BUFFER_ID = 7, /**< A process-wide unique ID for an allocated memory region*/
- CU_POINTER_ATTRIBUTE_IS_MANAGED = 8, /**< Indicates if the pointer points to managed memory */
- CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9, /**< A device ordinal of a device on which a pointer was allocated or registered */
- CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10, /**< 1 if this pointer maps to an allocation that is suitable for ::cudaIpcGetMemHandle, 0 otherwise **/
- CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11, /**< Starting address for this requested pointer */
- CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12, /**< Size of the address range for this requested pointer */
- CU_POINTER_ATTRIBUTE_MAPPED = 13, /**< 1 if this pointer is in a valid address range that is mapped to a backing allocation, 0 otherwise **/
- CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14, /**< Bitmask of allowed ::CUmemAllocationHandleType for this allocation **/
- CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15, /**< 1 if the memory this pointer is referencing can be used with the GPUDirect RDMA API **/
- CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16, /**< Returns the access flags the device associated with the current context has on the corresponding memory referenced by the pointer given */
- CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17, /**< Returns the mempool handle for the allocation if it was allocated from a mempool. Otherwise returns NULL. **/
- CU_POINTER_ATTRIBUTE_MAPPING_SIZE = 18, /**< Size of the actual underlying mapping that the pointer belongs to **/
- CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR = 19, /**< The start address of the mapping that the pointer belongs to **/
- CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID = 20 /**< A process-wide unique id corresponding to the physical allocation the pointer belongs to **/
- , CU_POINTER_ATTRIBUTE_IS_HW_DECOMPRESS_CAPABLE = 21 /**< Returns in \p *data a boolean that indicates whether the pointer points to memory that is capable to be used for hardware accelerated decompression. */
- } CUpointer_attribute;
- /**
- * Function properties
- */
- typedef enum CUfunction_attribute_enum {
- /**
- * The maximum number of threads per block, beyond which a launch of the
- * function would fail. This number depends on both the function and the
- * device on which the function is currently loaded.
- */
- CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
- /**
- * The size in bytes of statically-allocated shared memory required by
- * this function. This does not include dynamically-allocated shared
- * memory requested by the user at runtime.
- */
- CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
- /**
- * The size in bytes of user-allocated constant memory required by this
- * function.
- */
- CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
- /**
- * The size in bytes of local memory used by each thread of this function.
- */
- CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
- /**
- * The number of registers used by each thread of this function.
- */
- CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
- /**
- * The PTX virtual architecture version for which the function was
- * compiled. This value is the major PTX version * 10 + the minor PTX
- * version, so a PTX version 1.3 function would return the value 13.
- * Note that this may return the undefined value of 0 for cubins
- * compiled prior to CUDA 3.0.
- */
- CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
- /**
- * The binary architecture version for which the function was compiled.
- * This value is the major binary version * 10 + the minor binary version,
- * so a binary version 1.3 function would return the value 13. Note that
- * this will return a value of 10 for legacy cubins that do not have a
- * properly-encoded binary architecture version.
- */
- CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
- /**
- * The attribute to indicate whether the function has been compiled with
- * user specified option "-Xptxas --dlcm=ca" set .
- */
- CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
- /**
- * The maximum size in bytes of dynamically-allocated shared memory that can be used by
- * this function. If the user-specified dynamic shared memory size is larger than this
- * value, the launch will fail.
- * See ::cuFuncSetAttribute, ::cuKernelSetAttribute
- */
- CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
- /**
- * On devices where the L1 cache and shared memory use the same hardware resources,
- * this sets the shared memory carveout preference, in percent of the total shared memory.
- * Refer to ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR.
- * This is only a hint, and the driver can choose a different ratio if required to execute the function.
- * See ::cuFuncSetAttribute, ::cuKernelSetAttribute
- */
- CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
- /**
- * If this attribute is set, the kernel must launch with a valid cluster
- * size specified.
- * See ::cuFuncSetAttribute, ::cuKernelSetAttribute
- */
- CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET = 10,
- /**
- * The required cluster width in blocks. The values must either all be 0 or
- * all be positive. The validity of the cluster dimensions is otherwise
- * checked at launch time.
- *
- * If the value is set during compile time, it cannot be set at runtime.
- * Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
- * See ::cuFuncSetAttribute, ::cuKernelSetAttribute
- */
- CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH = 11,
- /**
- * The required cluster height in blocks. The values must either all be 0 or
- * all be positive. The validity of the cluster dimensions is otherwise
- * checked at launch time.
- *
- * If the value is set during compile time, it cannot be set at runtime.
- * Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED.
- * See ::cuFuncSetAttribute, ::cuKernelSetAttribute
- */
- CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT = 12,
- /**
- * The required cluster depth in blocks. The values must either all be 0 or
- * all be positive. The validity of the cluster dimensions is otherwise
- * checked at launch time.
- *
- * If the value is set during compile time, it cannot be set at runtime.
- * Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED.
- * See ::cuFuncSetAttribute, ::cuKernelSetAttribute
- */
- CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH = 13,
- /**
- * Whether the function can be launched with non-portable cluster size. 1 is
- * allowed, 0 is disallowed. A non-portable cluster size may only function
- * on the specific SKUs the program is tested on. The launch might fail if
- * the program is run on a different hardware platform.
- *
- * CUDA API provides cudaOccupancyMaxActiveClusters to assist with checking
- * whether the desired size can be launched on the current device.
- *
- * Portable Cluster Size
- *
- * A portable cluster size is guaranteed to be functional on all compute
- * capabilities higher than the target compute capability. The portable
- * cluster size for sm_90 is 8 blocks per cluster. This value may increase
- * for future compute capabilities.
- *
- * The specific hardware unit may support higher cluster sizes that’s not
- * guaranteed to be portable.
- * See ::cuFuncSetAttribute, ::cuKernelSetAttribute
- */
- CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED = 14,
- /**
- * The block scheduling policy of a function. The value type is
- * CUclusterSchedulingPolicy / cudaClusterSchedulingPolicy.
- * See ::cuFuncSetAttribute, ::cuKernelSetAttribute
- */
- CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 15,
- CU_FUNC_ATTRIBUTE_MAX
- } CUfunction_attribute;
- /**
- * Function cache configurations
- */
- typedef enum CUfunc_cache_enum {
- CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */
- CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */
- CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larger L1 cache and smaller shared memory */
- CU_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */
- } CUfunc_cache;
- /**
- * \deprecated
- *
- * Shared memory configurations
- */
- typedef enum CUsharedconfig_enum {
- CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00, /**< set default shared memory bank size */
- CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01, /**< set shared memory bank width to four bytes */
- CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02 /**< set shared memory bank width to eight bytes */
- } CUsharedconfig;
- /**
- * Shared memory carveout configurations. These may be passed to ::cuFuncSetAttribute or ::cuKernelSetAttribute
- */
- typedef enum CUshared_carveout_enum {
- CU_SHAREDMEM_CARVEOUT_DEFAULT = -1, /**< No preference for shared memory or L1 (default) */
- CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 100, /**< Prefer maximum available shared memory, minimum L1 cache */
- CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0 /**< Prefer maximum available L1 cache, minimum shared memory */
- } CUshared_carveout;
- /**
- * Memory types
- */
- typedef enum CUmemorytype_enum {
- CU_MEMORYTYPE_HOST = 0x01, /**< Host memory */
- CU_MEMORYTYPE_DEVICE = 0x02, /**< Device memory */
- CU_MEMORYTYPE_ARRAY = 0x03, /**< Array memory */
- CU_MEMORYTYPE_UNIFIED = 0x04 /**< Unified device or host memory */
- } CUmemorytype;
- /**
- * Compute Modes
- */
- typedef enum CUcomputemode_enum {
- CU_COMPUTEMODE_DEFAULT = 0, /**< Default compute mode (Multiple contexts allowed per device) */
- CU_COMPUTEMODE_PROHIBITED = 2, /**< Compute-prohibited mode (No contexts can be created on this device at this time) */
- CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 /**< Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time) */
- } CUcomputemode;
- /**
- * Memory advise values
- */
- typedef enum CUmem_advise_enum {
- CU_MEM_ADVISE_SET_READ_MOSTLY = 1, /**< Data will mostly be read and only occasionally be written to */
- CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2, /**< Undo the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY */
- CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3, /**< Set the preferred location for the data as the specified device */
- CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4, /**< Clear the preferred location for the data */
- CU_MEM_ADVISE_SET_ACCESSED_BY = 5, /**< Data will be accessed by the specified device, so prevent page faults as much as possible */
- CU_MEM_ADVISE_UNSET_ACCESSED_BY = 6 /**< Let the Unified Memory subsystem decide on the page faulting policy for the specified device */
- } CUmem_advise;
- typedef enum CUmem_range_attribute_enum {
- CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1, /**< Whether the range will mostly be read and only occasionally be written to */
- CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2, /**< The preferred location of the range */
- CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3, /**< Memory range has ::CU_MEM_ADVISE_SET_ACCESSED_BY set for specified device */
- CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4 /**< The last location to which the range was prefetched */
- , CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE = 5 /**< The preferred location type of the range */
- , CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID = 6 /**< The preferred location id of the range */
- , CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE = 7 /**< The last location type to which the range was prefetched */
- , CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID = 8 /**< The last location id to which the range was prefetched */
- } CUmem_range_attribute;
- /**
- * Online compiler and linker options
- */
- typedef enum CUjit_option_enum
- {
- /**
- * Max number of registers that a thread may use.\n
- * Option type: unsigned int\n
- * Applies to: compiler only
- */
- CU_JIT_MAX_REGISTERS = 0,
- /**
- * IN: Specifies minimum number of threads per block to target compilation
- * for\n
- * OUT: Returns the number of threads the compiler actually targeted.
- * This restricts the resource utilization of the compiler (e.g. max
- * registers) such that a block with the given number of threads should be
- * able to launch based on register limitations. Note, this option does not
- * currently take into account any other resource limitations, such as
- * shared memory utilization.\n
- * Cannot be combined with ::CU_JIT_TARGET.\n
- * Option type: unsigned int\n
- * Applies to: compiler only
- */
- CU_JIT_THREADS_PER_BLOCK = 1,
- /**
- * Overwrites the option value with the total wall clock time, in
- * milliseconds, spent in the compiler and linker\n
- * Option type: float\n
- * Applies to: compiler and linker
- */
- CU_JIT_WALL_TIME = 2,
- /**
- * Pointer to a buffer in which to print any log messages
- * that are informational in nature (the buffer size is specified via
- * option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)\n
- * Option type: char *\n
- * Applies to: compiler and linker
- */
- CU_JIT_INFO_LOG_BUFFER = 3,
- /**
- * IN: Log buffer size in bytes. Log messages will be capped at this size
- * (including null terminator)\n
- * OUT: Amount of log buffer filled with messages\n
- * Option type: unsigned int\n
- * Applies to: compiler and linker
- */
- CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4,
- /**
- * Pointer to a buffer in which to print any log messages that
- * reflect errors (the buffer size is specified via option
- * ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n
- * Option type: char *\n
- * Applies to: compiler and linker
- */
- CU_JIT_ERROR_LOG_BUFFER = 5,
- /**
- * IN: Log buffer size in bytes. Log messages will be capped at this size
- * (including null terminator)\n
- * OUT: Amount of log buffer filled with messages\n
- * Option type: unsigned int\n
- * Applies to: compiler and linker
- */
- CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6,
- /**
- * Level of optimizations to apply to generated code (0 - 4), with 4
- * being the default and highest level of optimizations.\n
- * Option type: unsigned int\n
- * Applies to: compiler only
- */
- CU_JIT_OPTIMIZATION_LEVEL = 7,
- /**
- * No option value required. Determines the target based on the current
- * attached context (default)\n
- * Option type: No option value needed\n
- * Applies to: compiler and linker
- */
- CU_JIT_TARGET_FROM_CUCONTEXT = 8,
- /**
- * Target is chosen based on supplied ::CUjit_target. Cannot be
- * combined with ::CU_JIT_THREADS_PER_BLOCK.\n
- * Option type: unsigned int for enumerated type ::CUjit_target\n
- * Applies to: compiler and linker
- */
- CU_JIT_TARGET = 9,
- /**
- * Specifies choice of fallback strategy if matching cubin is not found.
- * Choice is based on supplied ::CUjit_fallback. This option cannot be
- * used with cuLink* APIs as the linker requires exact matches.\n
- * Option type: unsigned int for enumerated type ::CUjit_fallback\n
- * Applies to: compiler only
- */
- CU_JIT_FALLBACK_STRATEGY = 10,
- /**
- * Specifies whether to create debug information in output (-g)
- * (0: false, default)\n
- * Option type: int\n
- * Applies to: compiler and linker
- */
- CU_JIT_GENERATE_DEBUG_INFO = 11,
- /**
- * Generate verbose log messages (0: false, default)\n
- * Option type: int\n
- * Applies to: compiler and linker
- */
- CU_JIT_LOG_VERBOSE = 12,
- /**
- * Generate line number information (-lineinfo) (0: false, default)\n
- * Option type: int\n
- * Applies to: compiler only
- */
- CU_JIT_GENERATE_LINE_INFO = 13,
- /**
- * Specifies whether to enable caching explicitly (-dlcm) \n
- * Choice is based on supplied ::CUjit_cacheMode_enum.\n
- * Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum\n
- * Applies to: compiler only
- */
- CU_JIT_CACHE_MODE = 14,
- /**
- * \deprecated
- * This jit option is deprecated and should not be used.
- */
- CU_JIT_NEW_SM3X_OPT = 15,
- /**
- * This jit option is used for internal purpose only.
- */
- CU_JIT_FAST_COMPILE = 16,
- /**
- * Array of device symbol names that will be relocated to the corresponding
- * host addresses stored in ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES.\n
- * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n
- * When loading a device module, driver will relocate all encountered
- * unresolved symbols to the host addresses.\n
- * It is only allowed to register symbols that correspond to unresolved
- * global variables.\n
- * It is illegal to register the same device symbol at multiple addresses.\n
- * Option type: const char **\n
- * Applies to: dynamic linker only
- */
- CU_JIT_GLOBAL_SYMBOL_NAMES = 17,
- /**
- * Array of host addresses that will be used to relocate corresponding
- * device symbols stored in ::CU_JIT_GLOBAL_SYMBOL_NAMES.\n
- * Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n
- * Option type: void **\n
- * Applies to: dynamic linker only
- */
- CU_JIT_GLOBAL_SYMBOL_ADDRESSES = 18,
- /**
- * Number of entries in ::CU_JIT_GLOBAL_SYMBOL_NAMES and
- * ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES arrays.\n
- * Option type: unsigned int\n
- * Applies to: dynamic linker only
- */
- CU_JIT_GLOBAL_SYMBOL_COUNT = 19,
- /**
- * \deprecated
- * Enable link-time optimization (-dlto) for device code (Disabled by default).\n
- * This option is not supported on 32-bit platforms.\n
- * Option type: int\n
- * Applies to: compiler and linker
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_LTO = 20,
- /**
- * \deprecated
- * Control single-precision denormals (-ftz) support (0: false, default).
- * 1 : flushes denormal values to zero
- * 0 : preserves denormal values
- * Option type: int\n
- * Applies to: link-time optimization specified with CU_JIT_LTO
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_FTZ = 21,
- /**
- * \deprecated
- * Control single-precision floating-point division and reciprocals
- * (-prec-div) support (1: true, default).
- * 1 : Enables the IEEE round-to-nearest mode
- * 0 : Enables the fast approximation mode
- * Option type: int\n
- * Applies to: link-time optimization specified with CU_JIT_LTO
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_PREC_DIV = 22,
- /**
- * \deprecated
- * Control single-precision floating-point square root
- * (-prec-sqrt) support (1: true, default).
- * 1 : Enables the IEEE round-to-nearest mode
- * 0 : Enables the fast approximation mode
- * Option type: int\n
- * Applies to: link-time optimization specified with CU_JIT_LTO
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_PREC_SQRT = 23,
- /**
- * \deprecated
- * Enable/Disable the contraction of floating-point multiplies
- * and adds/subtracts into floating-point multiply-add (-fma)
- * operations (1: Enable, default; 0: Disable).
- * Option type: int\n
- * Applies to: link-time optimization specified with CU_JIT_LTO
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_FMA = 24,
- /**
- * \deprecated
- * Array of kernel names that should be preserved at link time while others
- * can be removed.\n
- * Must contain ::CU_JIT_REFERENCED_KERNEL_COUNT entries.\n
- * Note that kernel names can be mangled by the compiler in which case the
- * mangled name needs to be specified.\n
- * Wildcard "*" can be used to represent zero or more characters instead of
- * specifying the full or mangled name.\n
- * It is important to note that the wildcard "*" is also added implicitly.
- * For example, specifying "foo" will match "foobaz", "barfoo", "barfoobaz" and
- * thus preserve all kernels with those names. This can be avoided by providing
- * a more specific name like "barfoobaz".\n
- * Option type: const char **\n
- * Applies to: dynamic linker only
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_REFERENCED_KERNEL_NAMES = 25,
- /**
- * \deprecated
- * Number of entries in ::CU_JIT_REFERENCED_KERNEL_NAMES array.\n
- * Option type: unsigned int\n
- * Applies to: dynamic linker only
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_REFERENCED_KERNEL_COUNT = 26,
- /**
- * \deprecated
- * Array of variable names (__device__ and/or __constant__) that should be
- * preserved at link time while others can be removed.\n
- * Must contain ::CU_JIT_REFERENCED_VARIABLE_COUNT entries.\n
- * Note that variable names can be mangled by the compiler in which case the
- * mangled name needs to be specified.\n
- * Wildcard "*" can be used to represent zero or more characters instead of
- * specifying the full or mangled name.\n
- * It is important to note that the wildcard "*" is also added implicitly.
- * For example, specifying "foo" will match "foobaz", "barfoo", "barfoobaz" and
- * thus preserve all variables with those names. This can be avoided by providing
- * a more specific name like "barfoobaz".\n
- * Option type: const char **\n
- * Applies to: link-time optimization specified with CU_JIT_LTO
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_REFERENCED_VARIABLE_NAMES = 27,
- /**
- * \deprecated
- * Number of entries in ::CU_JIT_REFERENCED_VARIABLE_NAMES array.\n
- * Option type: unsigned int\n
- * Applies to: link-time optimization specified with CU_JIT_LTO
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_REFERENCED_VARIABLE_COUNT = 28,
- /**
- * \deprecated
- * This option serves as a hint to enable the JIT compiler/linker
- * to remove constant (__constant__) and device (__device__) variables
- * unreferenced in device code (Disabled by default).\n
- * Note that host references to constant and device variables using APIs like
- * ::cuModuleGetGlobal() with this option specified may result in undefined behavior unless
- * the variables are explicitly specified using ::CU_JIT_REFERENCED_VARIABLE_NAMES.\n
- * Option type: int\n
- * Applies to: link-time optimization specified with CU_JIT_LTO
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES = 29,
- /**
- * Generate position independent code (0: false)\n
- * Option type: int\n
- * Applies to: compiler only
- */
- CU_JIT_POSITION_INDEPENDENT_CODE = 30,
- /**
- * This option hints to the JIT compiler the minimum number of CTAs from the
- * kernel’s grid to be mapped to a SM. This option is ignored when used together
- * with ::CU_JIT_MAX_REGISTERS or ::CU_JIT_THREADS_PER_BLOCK.
- * Optimizations based on this option need ::CU_JIT_MAX_THREADS_PER_BLOCK to
- * be specified as well. For kernels already using PTX directive .minnctapersm,
- * this option will be ignored by default. Use ::CU_JIT_OVERRIDE_DIRECTIVE_VALUES
- * to let this option take precedence over the PTX directive.
- * Option type: unsigned int\n
- * Applies to: compiler only
- */
- CU_JIT_MIN_CTA_PER_SM = 31,
- /**
- * Maximum number threads in a thread block, computed as the product of
- * the maximum extent specifed for each dimension of the block. This limit
- * is guaranteed not to be exeeded in any invocation of the kernel. Exceeding
- * the the maximum number of threads results in runtime error or kernel launch
- * failure. For kernels already using PTX directive .maxntid, this option will
- * be ignored by default. Use ::CU_JIT_OVERRIDE_DIRECTIVE_VALUES to let this
- * option take precedence over the PTX directive.
- * Option type: int\n
- * Applies to: compiler only
- */
- CU_JIT_MAX_THREADS_PER_BLOCK = 32,
- /**
- * This option lets the values specified using ::CU_JIT_MAX_REGISTERS,
- * ::CU_JIT_THREADS_PER_BLOCK, ::CU_JIT_MAX_THREADS_PER_BLOCK and
- * ::CU_JIT_MIN_CTA_PER_SM take precedence over any PTX directives.
- * (0: Disable, default; 1: Enable)
- * Option type: int\n
- * Applies to: compiler only
- */
- CU_JIT_OVERRIDE_DIRECTIVE_VALUES = 33,
- CU_JIT_NUM_OPTIONS
- } CUjit_option;
- /*
- * Indicates that compute device class supports accelerated features.
- */
- #define CU_COMPUTE_ACCELERATED_TARGET_BASE 0x10000
- /**
- * Online compilation targets
- */
- typedef enum CUjit_target_enum
- {
- CU_TARGET_COMPUTE_30 = 30, /**< Compute device class 3.0 */
- CU_TARGET_COMPUTE_32 = 32, /**< Compute device class 3.2 */
- CU_TARGET_COMPUTE_35 = 35, /**< Compute device class 3.5 */
- CU_TARGET_COMPUTE_37 = 37, /**< Compute device class 3.7 */
- CU_TARGET_COMPUTE_50 = 50, /**< Compute device class 5.0 */
- CU_TARGET_COMPUTE_52 = 52, /**< Compute device class 5.2 */
- CU_TARGET_COMPUTE_53 = 53, /**< Compute device class 5.3 */
- CU_TARGET_COMPUTE_60 = 60, /**< Compute device class 6.0.*/
- CU_TARGET_COMPUTE_61 = 61, /**< Compute device class 6.1.*/
- CU_TARGET_COMPUTE_62 = 62, /**< Compute device class 6.2.*/
- CU_TARGET_COMPUTE_70 = 70, /**< Compute device class 7.0.*/
- CU_TARGET_COMPUTE_72 = 72, /**< Compute device class 7.2.*/
- CU_TARGET_COMPUTE_75 = 75, /**< Compute device class 7.5.*/
- CU_TARGET_COMPUTE_80 = 80, /**< Compute device class 8.0.*/
- CU_TARGET_COMPUTE_86 = 86, /**< Compute device class 8.6.*/
- CU_TARGET_COMPUTE_87 = 87, /**< Compute device class 8.7.*/
- CU_TARGET_COMPUTE_89 = 89, /**< Compute device class 8.9.*/
- CU_TARGET_COMPUTE_90 = 90, /**< Compute device class 9.0.*/
- CU_TARGET_COMPUTE_100 = 100, /**< Compute device class 10.0.*/
- CU_TARGET_COMPUTE_101 = 101, /**< Compute device class 10.1.*/
- CU_TARGET_COMPUTE_120 = 120, /**< Compute device class 12.0.*/
- /**< Compute device class 9.0. with accelerated features.*/
- CU_TARGET_COMPUTE_90A = CU_COMPUTE_ACCELERATED_TARGET_BASE + CU_TARGET_COMPUTE_90,
- /**< Compute device class 10.0. with accelerated features.*/
- CU_TARGET_COMPUTE_100A = CU_COMPUTE_ACCELERATED_TARGET_BASE + CU_TARGET_COMPUTE_100,
- /**< Compute device class 10.1 with accelerated features.*/
- CU_TARGET_COMPUTE_101A = CU_COMPUTE_ACCELERATED_TARGET_BASE + CU_TARGET_COMPUTE_101,
- /**< Compute device class 12.0. with accelerated features.*/
- CU_TARGET_COMPUTE_120A = CU_COMPUTE_ACCELERATED_TARGET_BASE + CU_TARGET_COMPUTE_120,
- } CUjit_target;
- /**
- * Cubin matching fallback strategies
- */
- typedef enum CUjit_fallback_enum
- {
- CU_PREFER_PTX = 0, /**< Prefer to compile ptx if exact binary match not found */
- CU_PREFER_BINARY /**< Prefer to fall back to compatible binary code if exact match not found */
- } CUjit_fallback;
- /**
- * Caching modes for dlcm
- */
- typedef enum CUjit_cacheMode_enum
- {
- CU_JIT_CACHE_OPTION_NONE = 0, /**< Compile with no -dlcm flag specified */
- CU_JIT_CACHE_OPTION_CG, /**< Compile with L1 cache disabled */
- CU_JIT_CACHE_OPTION_CA /**< Compile with L1 cache enabled */
- } CUjit_cacheMode;
- /**
- * Device code formats
- */
- typedef enum CUjitInputType_enum
- {
- /**
- * Compiled device-class-specific device code\n
- * Applicable options: none
- */
- CU_JIT_INPUT_CUBIN = 0,
- /**
- * PTX source code\n
- * Applicable options: PTX compiler options
- */
- CU_JIT_INPUT_PTX = 1,
- /**
- * Bundle of multiple cubins and/or PTX of some device code\n
- * Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
- */
- CU_JIT_INPUT_FATBINARY = 2,
- /**
- * Host object with embedded device code\n
- * Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
- */
- CU_JIT_INPUT_OBJECT = 3,
- /**
- * Archive of host objects with embedded device code\n
- * Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
- */
- CU_JIT_INPUT_LIBRARY = 4,
- /**
- * \deprecated
- * High-level intermediate code for link-time optimization\n
- * Applicable options: NVVM compiler options, PTX compiler options
- *
- * Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- */
- CU_JIT_INPUT_NVVM = 5,
- CU_JIT_NUM_INPUT_TYPES = 6
- } CUjitInputType;
- typedef struct CUlinkState_st *CUlinkState;
- /**
- * Flags to register a graphics resource
- */
- typedef enum CUgraphicsRegisterFlags_enum {
- CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00,
- CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01,
- CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
- CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04,
- CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08
- } CUgraphicsRegisterFlags;
- /**
- * Flags for mapping and unmapping interop resources
- */
- typedef enum CUgraphicsMapResourceFlags_enum {
- CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
- CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
- CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
- } CUgraphicsMapResourceFlags;
- /**
- * Array indices for cube faces
- */
- typedef enum CUarray_cubemap_face_enum {
- CU_CUBEMAP_FACE_POSITIVE_X = 0x00, /**< Positive X face of cubemap */
- CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, /**< Negative X face of cubemap */
- CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, /**< Positive Y face of cubemap */
- CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, /**< Negative Y face of cubemap */
- CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, /**< Positive Z face of cubemap */
- CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 /**< Negative Z face of cubemap */
- } CUarray_cubemap_face;
- /**
- * Limits
- */
- typedef enum CUlimit_enum {
- CU_LIMIT_STACK_SIZE = 0x00, /**< GPU thread stack size */
- CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, /**< GPU printf FIFO size */
- CU_LIMIT_MALLOC_HEAP_SIZE = 0x02, /**< GPU malloc heap size */
- CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03, /**< GPU device runtime launch synchronize depth */
- CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04, /**< GPU device runtime pending launch count */
- CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x05, /**< A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint */
- CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x06, /**< A size in bytes for L2 persisting lines cache size */
- CU_LIMIT_SHMEM_SIZE = 0x07, /**< A maximum size in bytes of shared memory available to CUDA kernels on a CIG context. Can only be queried, cannot be set */
- CU_LIMIT_CIG_ENABLED = 0x08, /**< A non-zero value indicates this CUDA context is a CIG-enabled context. Can only be queried, cannot be set */
- CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED = 0x09, /**< When set to zero, CUDA will fail to launch a kernel on a CIG context, instead of using the fallback path, if the kernel uses more shared memory than available */
- CU_LIMIT_MAX
- } CUlimit;
- /**
- * Resource types
- */
- typedef enum CUresourcetype_enum {
- CU_RESOURCE_TYPE_ARRAY = 0x00, /**< Array resource */
- CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */
- CU_RESOURCE_TYPE_LINEAR = 0x02, /**< Linear resource */
- CU_RESOURCE_TYPE_PITCH2D = 0x03 /**< Pitch 2D resource */
- } CUresourcetype;
- #ifdef _WIN32
- #define CUDA_CB __stdcall
- #else
- #define CUDA_CB
- #endif
- /**
- * CUDA host function
- * \param userData Argument value passed to the function
- */
- typedef void (CUDA_CB *CUhostFn)(void *userData);
- /**
- * Specifies performance hint with ::CUaccessPolicyWindow for hitProp and missProp members.
- */
- typedef enum CUaccessProperty_enum {
- CU_ACCESS_PROPERTY_NORMAL = 0, /**< Normal cache persistence. */
- CU_ACCESS_PROPERTY_STREAMING = 1, /**< Streaming access is less likely to persit from cache. */
- CU_ACCESS_PROPERTY_PERSISTING = 2 /**< Persisting access is more likely to persist in cache.*/
- } CUaccessProperty;
- /**
- * Specifies an access policy for a window, a contiguous extent of memory
- * beginning at base_ptr and ending at base_ptr + num_bytes.
- * num_bytes is limited by CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE.
- * Partition into many segments and assign segments such that:
- * sum of "hit segments" / window == approx. ratio.
- * sum of "miss segments" / window == approx 1-ratio.
- * Segments and ratio specifications are fitted to the capabilities of
- * the architecture.
- * Accesses in a hit segment apply the hitProp access policy.
- * Accesses in a miss segment apply the missProp access policy.
- */
- typedef struct CUaccessPolicyWindow_st {
- void *base_ptr; /**< Starting address of the access policy window. CUDA driver may align it. */
- size_t num_bytes; /**< Size in bytes of the window policy. CUDA driver may restrict the maximum size and alignment. */
- float hitRatio; /**< hitRatio specifies percentage of lines assigned hitProp, rest are assigned missProp. */
- CUaccessProperty hitProp; /**< ::CUaccessProperty set for hit. */
- CUaccessProperty missProp; /**< ::CUaccessProperty set for miss. Must be either NORMAL or STREAMING */
- } CUaccessPolicyWindow_v1;
- /**
- * Access policy window
- */
- typedef CUaccessPolicyWindow_v1 CUaccessPolicyWindow;
- /**
- * GPU kernel node parameters
- */
- typedef struct CUDA_KERNEL_NODE_PARAMS_st {
- CUfunction func; /**< Kernel to launch */
- unsigned int gridDimX; /**< Width of grid in blocks */
- unsigned int gridDimY; /**< Height of grid in blocks */
- unsigned int gridDimZ; /**< Depth of grid in blocks */
- unsigned int blockDimX; /**< X dimension of each thread block */
- unsigned int blockDimY; /**< Y dimension of each thread block */
- unsigned int blockDimZ; /**< Z dimension of each thread block */
- unsigned int sharedMemBytes; /**< Dynamic shared-memory size per thread block in bytes */
- void **kernelParams; /**< Array of pointers to kernel parameters */
- void **extra; /**< Extra options */
- } CUDA_KERNEL_NODE_PARAMS_v1;
- /**
- * GPU kernel node parameters
- */
- typedef struct CUDA_KERNEL_NODE_PARAMS_v2_st {
- CUfunction func; /**< Kernel to launch */
- unsigned int gridDimX; /**< Width of grid in blocks */
- unsigned int gridDimY; /**< Height of grid in blocks */
- unsigned int gridDimZ; /**< Depth of grid in blocks */
- unsigned int blockDimX; /**< X dimension of each thread block */
- unsigned int blockDimY; /**< Y dimension of each thread block */
- unsigned int blockDimZ; /**< Z dimension of each thread block */
- unsigned int sharedMemBytes; /**< Dynamic shared-memory size per thread block in bytes */
- void **kernelParams; /**< Array of pointers to kernel parameters */
- void **extra; /**< Extra options */
- CUkernel kern; /**< Kernel to launch, will only be referenced if func is NULL */
- CUcontext ctx; /**< Context for the kernel task to run in. The value NULL will indicate the current context should be used by the api. This field is ignored if func is set. */
- } CUDA_KERNEL_NODE_PARAMS_v2;
- typedef CUDA_KERNEL_NODE_PARAMS_v2 CUDA_KERNEL_NODE_PARAMS;
- /**
- * GPU kernel node parameters
- */
- typedef struct CUDA_KERNEL_NODE_PARAMS_v3_st {
- CUfunction func; /**< Kernel to launch */
- unsigned int gridDimX; /**< Width of grid in blocks */
- unsigned int gridDimY; /**< Height of grid in blocks */
- unsigned int gridDimZ; /**< Depth of grid in blocks */
- unsigned int blockDimX; /**< X dimension of each thread block */
- unsigned int blockDimY; /**< Y dimension of each thread block */
- unsigned int blockDimZ; /**< Z dimension of each thread block */
- unsigned int sharedMemBytes; /**< Dynamic shared-memory size per thread block in bytes */
- void **kernelParams; /**< Array of pointers to kernel parameters */
- void **extra; /**< Extra options */
- CUkernel kern; /**< Kernel to launch, will only be referenced if func is NULL */
- CUcontext ctx; /**< Context for the kernel task to run in. The value NULL will indicate the current context should be used by the api. This field is ignored if func is set. */
- } CUDA_KERNEL_NODE_PARAMS_v3;
- /**
- * Memset node parameters
- */
- typedef struct CUDA_MEMSET_NODE_PARAMS_st {
- CUdeviceptr dst; /**< Destination device pointer */
- size_t pitch; /**< Pitch of destination device pointer. Unused if height is 1 */
- unsigned int value; /**< Value to be set */
- unsigned int elementSize; /**< Size of each element in bytes. Must be 1, 2, or 4. */
- size_t width; /**< Width of the row in elements */
- size_t height; /**< Number of rows */
- } CUDA_MEMSET_NODE_PARAMS_v1;
- typedef CUDA_MEMSET_NODE_PARAMS_v1 CUDA_MEMSET_NODE_PARAMS;
- /**
- * Memset node parameters
- */
- typedef struct CUDA_MEMSET_NODE_PARAMS_v2_st {
- CUdeviceptr dst; /**< Destination device pointer */
- size_t pitch; /**< Pitch of destination device pointer. Unused if height is 1 */
- unsigned int value; /**< Value to be set */
- unsigned int elementSize; /**< Size of each element in bytes. Must be 1, 2, or 4. */
- size_t width; /**< Width of the row in elements */
- size_t height; /**< Number of rows */
- CUcontext ctx; /**< Context on which to run the node */
- } CUDA_MEMSET_NODE_PARAMS_v2;
- /**
- * Host node parameters
- */
- typedef struct CUDA_HOST_NODE_PARAMS_st {
- CUhostFn fn; /**< The function to call when the node executes */
- void* userData; /**< Argument to pass to the function */
- } CUDA_HOST_NODE_PARAMS_v1;
- typedef CUDA_HOST_NODE_PARAMS_v1 CUDA_HOST_NODE_PARAMS;
- /**
- * Host node parameters
- */
- typedef struct CUDA_HOST_NODE_PARAMS_v2_st {
- CUhostFn fn; /**< The function to call when the node executes */
- void* userData; /**< Argument to pass to the function */
- } CUDA_HOST_NODE_PARAMS_v2;
- /**
- * Conditional node handle flags
- */
- #define CU_GRAPH_COND_ASSIGN_DEFAULT 0x1 /**< Default value is applied when graph is launched. */
- /**
- * Conditional node types
- */
- typedef enum CUgraphConditionalNodeType_enum {
- CU_GRAPH_COND_TYPE_IF = 0, /**< Conditional 'if/else' Node. Body[0] executed if condition is non-zero. If \p size == 2, an optional ELSE graph is created and this is executed if the condition is zero. */
- CU_GRAPH_COND_TYPE_WHILE = 1, /**< Conditional 'while' Node. Body executed repeatedly while condition value is non-zero. */
- CU_GRAPH_COND_TYPE_SWITCH = 2, /**< Conditional 'switch' Node. Body[n] is executed once, where 'n' is the value of the condition. If the condition does not match a body index, no body is launched. */
- } CUgraphConditionalNodeType;
- /**
- * Conditional node parameters
- */
- typedef struct CUDA_CONDITIONAL_NODE_PARAMS {
- CUgraphConditionalHandle handle; /**< Conditional node handle.
- Handles must be created in advance of creating the node
- using ::cuGraphConditionalHandleCreate. */
- CUgraphConditionalNodeType type; /**< Type of conditional node. */
- unsigned int size; /**< Size of graph output array. Allowed values are 1 for CU_GRAPH_COND_TYPE_WHILE, 1 or 2
- for CU_GRAPH_COND_TYPE_IF, or any value greater than zero for CU_GRAPH_COND_TYPE_SWITCH. */
- CUgraph *phGraph_out; /**< CUDA-owned array populated with conditional node child graphs during creation of the node.
- Valid for the lifetime of the conditional node.
- The contents of the graph(s) are subject to the following constraints:
- - Allowed node types are kernel nodes, empty nodes, child graphs, memsets,
- memcopies, and conditionals. This applies recursively to child graphs and conditional bodies.
- - All kernels, including kernels in nested conditionals or child graphs at any level,
- must belong to the same CUDA context.
- These graphs may be populated using graph node creation APIs or ::cuStreamBeginCaptureToGraph.
- CU_GRAPH_COND_TYPE_IF:
- phGraph_out[0] is executed when the condition is non-zero. If \p size == 2, phGraph_out[1] will
- be executed when the condition is zero.
- CU_GRAPH_COND_TYPE_WHILE:
- phGraph_out[0] is executed as long as the condition is non-zero.
- CU_GRAPH_COND_TYPE_SWITCH:
- phGraph_out[n] is executed when the condition is equal to n. If the condition >= \p size,
- no body graph is executed.
- */
- CUcontext ctx; /**< Context on which to run the node. Must match context used to create the handle and all body nodes. */
- } CUDA_CONDITIONAL_NODE_PARAMS;
- /**
- * Graph node types
- */
- typedef enum CUgraphNodeType_enum {
- CU_GRAPH_NODE_TYPE_KERNEL = 0, /**< GPU kernel node */
- CU_GRAPH_NODE_TYPE_MEMCPY = 1, /**< Memcpy node */
- CU_GRAPH_NODE_TYPE_MEMSET = 2, /**< Memset node */
- CU_GRAPH_NODE_TYPE_HOST = 3, /**< Host (executable) node */
- CU_GRAPH_NODE_TYPE_GRAPH = 4, /**< Node which executes an embedded graph */
- CU_GRAPH_NODE_TYPE_EMPTY = 5, /**< Empty (no-op) node */
- CU_GRAPH_NODE_TYPE_WAIT_EVENT = 6, /**< External event wait node */
- CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7, /**< External event record node */
- CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8, /**< External semaphore signal node */
- CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9, /**< External semaphore wait node */
- CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10,/**< Memory Allocation Node */
- CU_GRAPH_NODE_TYPE_MEM_FREE = 11,/**< Memory Free Node */
- CU_GRAPH_NODE_TYPE_BATCH_MEM_OP = 12,/**< Batch MemOp Node */
- CU_GRAPH_NODE_TYPE_CONDITIONAL = 13 /**< Conditional Node
- May be used to implement a conditional execution path or loop
- inside of a graph. The graph(s) contained within the body of the conditional node
- can be selectively executed or iterated upon based on the value of a conditional
- variable.
- Handles must be created in advance of creating the node
- using ::cuGraphConditionalHandleCreate.
- The following restrictions apply to graphs which contain conditional nodes:
- The graph cannot be used in a child node.
- Only one instantiation of the graph may exist at any point in time.
- The graph cannot be cloned.
- To set the control value, supply a default value when creating the handle and/or
- call ::cudaGraphSetConditional from device code.*/
- } CUgraphNodeType;
- /**
- * Type annotations that can be applied to graph edges as part of ::CUgraphEdgeData.
- */
- typedef enum CUgraphDependencyType_enum {
- CU_GRAPH_DEPENDENCY_TYPE_DEFAULT = 0, /**< This is an ordinary dependency. */
- CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC = 1 /**< This dependency type allows the downstream node to
- use \c cudaGridDependencySynchronize(). It may only be used
- between kernel nodes, and must be used with either the
- ::CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC or
- ::CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER outgoing port. */
- } CUgraphDependencyType;
- /**
- * This port activates when the kernel has finished executing.
- */
- #define CU_GRAPH_KERNEL_NODE_PORT_DEFAULT 0
- /**
- * This port activates when all blocks of the kernel have performed cudaTriggerProgrammaticLaunchCompletion()
- * or have terminated. It must be used with edge type ::CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC. See also
- * ::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT.
- */
- #define CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC 1
- /**
- * This port activates when all blocks of the kernel have begun execution. See also
- * ::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT.
- */
- #define CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER 2
- /**
- * Optional annotation for edges in a CUDA graph. Note, all edges implicitly have annotations and
- * default to a zero-initialized value if not specified. A zero-initialized struct indicates a
- * standard full serialization of two nodes with memory visibility.
- */
- typedef struct CUgraphEdgeData_st {
- unsigned char from_port; /**< This indicates when the dependency is triggered from the upstream
- node on the edge. The meaning is specfic to the node type. A value
- of 0 in all cases means full completion of the upstream node, with
- memory visibility to the downstream node or portion thereof
- (indicated by \c to_port).
- <br>
- Only kernel nodes define non-zero ports. A kernel node
- can use the following output port types:
- ::CU_GRAPH_KERNEL_NODE_PORT_DEFAULT, ::CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC,
- or ::CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER. */
- unsigned char to_port; /**< This indicates what portion of the downstream node is dependent on
- the upstream node or portion thereof (indicated by \c from_port). The
- meaning is specific to the node type. A value of 0 in all cases means
- the entirety of the downstream node is dependent on the upstream work.
- <br>
- Currently no node types define non-zero ports. Accordingly, this field
- must be set to zero. */
- unsigned char type; /**< This should be populated with a value from ::CUgraphDependencyType. (It
- is typed as char due to compiler-specific layout of bitfields.) See
- ::CUgraphDependencyType. */
- unsigned char reserved[5]; /**< These bytes are unused and must be zeroed. This ensures
- compatibility if additional fields are added in the future. */
- } CUgraphEdgeData;
- /**
- * Graph instantiation results
- */
- typedef enum CUgraphInstantiateResult_enum
- {
- CUDA_GRAPH_INSTANTIATE_SUCCESS = 0, /**< Instantiation succeeded */
- CUDA_GRAPH_INSTANTIATE_ERROR = 1, /**< Instantiation failed for an unexpected reason which is described in the return value of the function */
- CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE = 2, /**< Instantiation failed due to invalid structure, such as cycles */
- CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED = 3, /**< Instantiation for device launch failed because the graph contained an unsupported operation */
- CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED = 4, /**< Instantiation for device launch failed due to the nodes belonging to different contexts */
- CUDA_GRAPH_INSTANTIATE_CONDITIONAL_HANDLE_UNUSED = 5, /**< One or more conditional handles are not associated with conditional nodes */
- } CUgraphInstantiateResult;
- /**
- * Graph instantiation parameters
- */
- typedef struct CUDA_GRAPH_INSTANTIATE_PARAMS_st
- {
- cuuint64_t flags; /**< Instantiation flags */
- CUstream hUploadStream; /**< Upload stream */
- CUgraphNode hErrNode_out; /**< The node which caused instantiation to fail, if any */
- CUgraphInstantiateResult result_out; /**< Whether instantiation was successful. If it failed, the reason why */
- } CUDA_GRAPH_INSTANTIATE_PARAMS;
- typedef enum CUsynchronizationPolicy_enum {
- CU_SYNC_POLICY_AUTO = 1,
- CU_SYNC_POLICY_SPIN = 2,
- CU_SYNC_POLICY_YIELD = 3,
- CU_SYNC_POLICY_BLOCKING_SYNC = 4
- } CUsynchronizationPolicy;
- /**
- * Cluster scheduling policies. These may be passed to ::cuFuncSetAttribute or ::cuKernelSetAttribute
- */
- typedef enum CUclusterSchedulingPolicy_enum {
- CU_CLUSTER_SCHEDULING_POLICY_DEFAULT = 0, /**< the default policy */
- CU_CLUSTER_SCHEDULING_POLICY_SPREAD = 1, /**< spread the blocks within a cluster to the SMs */
- CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING = 2 /**< allow the hardware to load-balance the blocks in a cluster to the SMs */
- } CUclusterSchedulingPolicy;
- /**
- * Memory Synchronization Domain
- *
- * A kernel can be launched in a specified memory synchronization domain that affects all memory operations issued by
- * that kernel. A memory barrier issued in one domain will only order memory operations in that domain, thus eliminating
- * latency increase from memory barriers ordering unrelated traffic.
- *
- * By default, kernels are launched in domain 0. Kernel launched with ::CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE will have a
- * different domain ID. User may also alter the domain ID with ::CUlaunchMemSyncDomainMap for a specific stream /
- * graph node / kernel launch. See ::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN, ::cuStreamSetAttribute, ::cuLaunchKernelEx,
- * ::cuGraphKernelNodeSetAttribute.
- *
- * Memory operations done in kernels launched in different domains are considered system-scope distanced. In other
- * words, a GPU scoped memory synchronization is not sufficient for memory order to be observed by kernels in another
- * memory synchronization domain even if they are on the same GPU.
- */
- typedef enum CUlaunchMemSyncDomain_enum {
- CU_LAUNCH_MEM_SYNC_DOMAIN_DEFAULT = 0, /**< Launch kernels in the default domain */
- CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE = 1 /**< Launch kernels in the remote domain */
- } CUlaunchMemSyncDomain;
- /**
- * Memory Synchronization Domain map
- *
- * See ::cudaLaunchMemSyncDomain.
- *
- * By default, kernels are launched in domain 0. Kernel launched with ::CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE will have a
- * different domain ID. User may also alter the domain ID with ::CUlaunchMemSyncDomainMap for a specific stream /
- * graph node / kernel launch. See ::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.
- *
- * Domain ID range is available through ::CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT.
- */
- typedef struct CUlaunchMemSyncDomainMap_st {
- unsigned char default_; /**< The default domain ID to use for designated kernels */
- unsigned char remote; /**< The remote domain ID to use for designated kernels */
- } CUlaunchMemSyncDomainMap;
- /**
- * Launch attributes enum; used as id field of ::CUlaunchAttribute
- */
- typedef enum CUlaunchAttributeID_enum {
- CU_LAUNCH_ATTRIBUTE_IGNORE = 0 /**< Ignored entry, for convenient composition */
- , CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1 /**< Valid for streams, graph nodes, launches. See
- ::CUlaunchAttributeValue::accessPolicyWindow. */
- , CU_LAUNCH_ATTRIBUTE_COOPERATIVE = 2 /**< Valid for graph nodes, launches. See
- ::CUlaunchAttributeValue::cooperative. */
- , CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3 /**< Valid for streams. See
- ::CUlaunchAttributeValue::syncPolicy. */
- , CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = 4 /**< Valid for graph nodes, launches. See ::CUlaunchAttributeValue::clusterDim. */
- , CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 5 /**< Valid for graph nodes, launches. See ::CUlaunchAttributeValue::clusterSchedulingPolicyPreference. */
- , CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = 6 /**< Valid for launches. Setting
- ::CUlaunchAttributeValue::programmaticStreamSerializationAllowed
- to non-0 signals that the kernel will use programmatic
- means to resolve its stream dependency, so that the
- CUDA runtime should opportunistically allow the grid's
- execution to overlap with the previous kernel in the
- stream, if that kernel requests the overlap. The
- dependent launches can choose to wait on the
- dependency using the programmatic sync
- (cudaGridDependencySynchronize() or equivalent PTX
- instructions). */
- , CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = 7 /**< Valid for launches. Set
- ::CUlaunchAttributeValue::programmaticEvent to
- record the event. Event recorded through this
- launch attribute is guaranteed to only trigger
- after all block in the associated kernel trigger
- the event. A block can trigger the event through
- PTX launchdep.release or CUDA builtin function
- cudaTriggerProgrammaticLaunchCompletion(). A
- trigger can also be inserted at the beginning of
- each block's execution if triggerAtBlockStart is
- set to non-0. The dependent launches can choose to
- wait on the dependency using the programmatic sync
- (cudaGridDependencySynchronize() or equivalent PTX
- instructions). Note that dependents (including the
- CPU thread calling cuEventSynchronize()) are not
- guaranteed to observe the release precisely when
- it is released. For example, cuEventSynchronize()
- may only observe the event trigger long after the
- associated kernel has completed. This recording
- type is primarily meant for establishing
- programmatic dependency between device tasks. Note
- also this type of dependency allows, but does not
- guarantee, concurrent execution of tasks.
- <br>
- The event supplied must not be an interprocess or
- interop event. The event must disable timing (i.e.
- must be created with the ::CU_EVENT_DISABLE_TIMING
- flag set).
- */
- , CU_LAUNCH_ATTRIBUTE_PRIORITY = 8 /**< Valid for streams, graph nodes, launches. See
- ::CUlaunchAttributeValue::priority. */
- , CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = 9 /**< Valid for streams, graph nodes, launches. See
- ::CUlaunchAttributeValue::memSyncDomainMap. */
- , CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = 10 /**< Valid for streams, graph nodes, launches. See
- ::CUlaunchAttributeValue::memSyncDomain. */
- , CU_LAUNCH_ATTRIBUTE_PREFERRED_CLUSTER_DIMENSION = 11 /**< Valid for graph nodes, launches. Set
- ::CUlaunchAttributeValue::preferredClusterDim
- to allow the kernel launch to specify a preferred substitute
- cluster dimension. Blocks may be grouped according to either
- the dimensions specified with this attribute (grouped into a
- "preferred substitute cluster"), or the one specified with
- ::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION attribute (grouped
- into a "regular cluster"). The cluster dimensions of a
- "preferred substitute cluster" shall be an integer multiple
- greater than zero of the regular cluster dimensions. The
- device will attempt - on a best-effort basis - to group
- thread blocks into preferred clusters over grouping them
- into regular clusters. When it deems necessary (primarily
- when the device temporarily runs out of physical resources
- to launch the larger preferred clusters), the device may
- switch to launch the regular clusters instead to attempt to
- utilize as much of the physical device resources as possible.
- <br>
- Each type of cluster will have its enumeration / coordinate
- setup as if the grid consists solely of its type of cluster.
- For example, if the preferred substitute cluster dimensions
- double the regular cluster dimensions, there might be
- simultaneously a regular cluster indexed at (1,0,0), and a
- preferred cluster indexed at (1,0,0). In this example, the
- preferred substitute cluster (1,0,0) replaces regular
- clusters (2,0,0) and (3,0,0) and groups their blocks.
- <br>
- This attribute will only take effect when a regular cluster
- dimension has been specified. The preferred substitute
- cluster dimension must be an integer multiple greater than
- zero of the regular cluster dimension and must divide the
- grid. It must also be no more than `maxBlocksPerCluster`, if
- it is set in the kernel's `__launch_bounds__`. Otherwise it
- must be less than the maximum value the driver can support.
- Otherwise, setting this attribute to a value physically
- unable to fit on any particular device is permitted. */
- , CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = 12 /**< Valid for launches. Set
- ::CUlaunchAttributeValue::launchCompletionEvent to record the
- event.
- <br>
- Nominally, the event is triggered once all blocks of the kernel
- have begun execution. Currently this is a best effort. If a kernel
- B has a launch completion dependency on a kernel A, B may wait
- until A is complete. Alternatively, blocks of B may begin before
- all blocks of A have begun, for example if B can claim execution
- resources unavailable to A (e.g. they run on different GPUs) or
- if B is a higher priority than A.
- Exercise caution if such an ordering inversion could lead
- to deadlock.
- <br>
- A launch completion event is nominally similar to a programmatic
- event with \c triggerAtBlockStart set except that it is not
- visible to \c cudaGridDependencySynchronize() and can be used with
- compute capability less than 9.0.
- <br>
- The event supplied must not be an interprocess or interop
- event. The event must disable timing (i.e. must be created
- with the ::CU_EVENT_DISABLE_TIMING flag set). */
- , CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13 /**< Valid for graph nodes, launches. This attribute is graphs-only,
- and passing it to a launch in a non-capturing stream will result
- in an error.
- <br>
- ::CUlaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable can
- only be set to 0 or 1. Setting the field to 1 indicates that the
- corresponding kernel node should be device-updatable. On success, a handle
- will be returned via
- ::CUlaunchAttributeValue::deviceUpdatableKernelNode::devNode which can be
- passed to the various device-side update functions to update the node's
- kernel parameters from within another kernel. For more information on the
- types of device updates that can be made, as well as the relevant limitations
- thereof, see ::cudaGraphKernelNodeUpdatesApply.
- <br>
- Nodes which are device-updatable have additional restrictions compared to
- regular kernel nodes. Firstly, device-updatable nodes cannot be removed
- from their graph via ::cuGraphDestroyNode. Additionally, once opted-in
- to this functionality, a node cannot opt out, and any attempt to set the
- deviceUpdatable attribute to 0 will result in an error. Device-updatable
- kernel nodes also cannot have their attributes copied to/from another kernel
- node via ::cuGraphKernelNodeCopyAttributes. Graphs containing one or more
- device-updatable nodes also do not allow multiple instantiation, and neither
- the graph nor its instantiated version can be passed to ::cuGraphExecUpdate.
- <br>
- If a graph contains device-updatable nodes and updates those nodes from the device
- from within the graph, the graph must be uploaded with ::cuGraphUpload before it
- is launched. For such a graph, if host-side executable graph updates are made to the
- device-updatable nodes, the graph must be uploaded before it is launched again. */
- , CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14 /**< Valid for launches. On devices where the L1 cache and shared memory use the
- same hardware resources, setting ::CUlaunchAttributeValue::sharedMemCarveout to a
- percentage between 0-100 signals the CUDA driver to set the shared memory carveout
- preference, in percent of the total shared memory for that kernel launch.
- This attribute takes precedence over ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.
- This is only a hint, and the CUDA driver can choose a different configuration if
- required for the launch. */
- #if defined(__CUDA_API_VERSION_INTERNAL) && !defined(__CUDA_API_VERSION_INTERNAL_ODR)
- , CU_LAUNCH_ATTRIBUTE_MAX
- #endif
- } CUlaunchAttributeID;
- /**
- * Launch attributes union; used as value field of ::CUlaunchAttribute
- */
- typedef union CUlaunchAttributeValue_union {
- char pad[64]; /* Pad to 64 bytes */
- CUaccessPolicyWindow accessPolicyWindow; /**< Value of launch attribute ::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW. */
- int cooperative; /**< Value of launch attribute ::CU_LAUNCH_ATTRIBUTE_COOPERATIVE. Nonzero indicates a cooperative
- kernel (see ::cuLaunchCooperativeKernel). */
- CUsynchronizationPolicy syncPolicy; /**< Value of launch attribute
- ::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY. ::CUsynchronizationPolicy for
- work queued up in this stream */
- /**
- * Value of launch attribute ::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION that
- * represents the desired cluster dimensions for the kernel. Opaque type
- * with the following fields:
- * - \p x - The X dimension of the cluster, in blocks. Must be a divisor
- * of the grid X dimension.
- * - \p y - The Y dimension of the cluster, in blocks. Must be a divisor
- * of the grid Y dimension.
- * - \p z - The Z dimension of the cluster, in blocks. Must be a divisor
- * of the grid Z dimension.
- */
- struct {
- unsigned int x;
- unsigned int y;
- unsigned int z;
- } clusterDim;
- CUclusterSchedulingPolicy clusterSchedulingPolicyPreference; /**< Value of launch attribute
- ::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE. Cluster
- scheduling policy preference for the kernel. */
- int programmaticStreamSerializationAllowed; /**< Value of launch attribute
- ::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION. */
- /**
- * Value of launch attribute ::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
- * with the following fields:
- * - \p CUevent event - Event to fire when all blocks trigger it.
- * - \p Event record flags, see ::cuEventRecordWithFlags. Does not accept :CU_EVENT_RECORD_EXTERNAL.
- * - \p triggerAtBlockStart - If this is set to non-0, each block launch will automatically trigger the event.
- */
- struct {
- CUevent event;
- int flags;
- int triggerAtBlockStart;
- } programmaticEvent;
- /**
- * Value of launch attribute ::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT
- * with the following fields:
- * - \p CUevent event - Event to fire when the last block launches
- * - \p int flags; - Event record flags, see ::cuEventRecordWithFlags. Does not accept ::CU_EVENT_RECORD_EXTERNAL.
- */
- struct {
- CUevent event;
- int flags;
- } launchCompletionEvent;
- int priority; /**< Value of launch attribute ::CU_LAUNCH_ATTRIBUTE_PRIORITY. Execution priority of the kernel. */
- CUlaunchMemSyncDomainMap memSyncDomainMap; /**< Value of launch attribute
- ::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP. See
- ::CUlaunchMemSyncDomainMap. */
- CUlaunchMemSyncDomain memSyncDomain; /**< Value of launch attribute
- ::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN. See::CUlaunchMemSyncDomain */
- /**
- * Value of launch attribute ::CU_LAUNCH_ATTRIBUTE_PREFERRED_CLUSTER_DIMENSION
- * that represents the desired preferred cluster dimensions for the kernel.
- * Opaque type with the following fields:
- * - \p x - The X dimension of the preferred cluster, in blocks. Must
- * be a divisor of the grid X dimension, and must be a
- * multiple of the \p x field of ::CUlaunchAttributeValue::clusterDim.
- * - \p y - The Y dimension of the preferred cluster, in blocks. Must
- * be a divisor of the grid Y dimension, and must be a
- * multiple of the \p y field of ::CUlaunchAttributeValue::clusterDim.
- * - \p z - The Z dimension of the preferred cluster, in blocks. Must be
- * equal to the \p z field of ::CUlaunchAttributeValue::clusterDim.
- */
- struct {
- unsigned int x;
- unsigned int y;
- unsigned int z;
- } preferredClusterDim;
- /**
- * Value of launch attribute ::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE.
- * with the following fields:
- * - \p int deviceUpdatable - Whether or not the resulting kernel node should be device-updatable.
- * - \p CUgraphDeviceNode devNode - Returns a handle to pass to the various device-side update functions.
- */
- struct {
- int deviceUpdatable;
- CUgraphDeviceNode devNode;
- } deviceUpdatableKernelNode;
- unsigned int sharedMemCarveout; /**< Value of launch attribute ::CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT. */
- } CUlaunchAttributeValue;
- /**
- * Launch attribute
- */
- typedef struct CUlaunchAttribute_st {
- CUlaunchAttributeID id; /**< Attribute to set */
- char pad[8 - sizeof(CUlaunchAttributeID)];
- CUlaunchAttributeValue value; /**< Value of the attribute */
- } CUlaunchAttribute;
- /**
- * CUDA extensible launch configuration
- */
- typedef struct CUlaunchConfig_st {
- unsigned int gridDimX; /**< Width of grid in blocks */
- unsigned int gridDimY; /**< Height of grid in blocks */
- unsigned int gridDimZ; /**< Depth of grid in blocks */
- unsigned int blockDimX; /**< X dimension of each thread block */
- unsigned int blockDimY; /**< Y dimension of each thread block */
- unsigned int blockDimZ; /**< Z dimension of each thread block */
- unsigned int sharedMemBytes; /**< Dynamic shared-memory size per thread block in bytes */
- CUstream hStream; /**< Stream identifier */
- CUlaunchAttribute *attrs; /**< List of attributes; nullable if ::CUlaunchConfig::numAttrs == 0 */
- unsigned int numAttrs; /**< Number of attributes populated in ::CUlaunchConfig::attrs */
- } CUlaunchConfig;
- typedef CUlaunchAttributeID CUkernelNodeAttrID;
- #define CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW
- #define CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE CU_LAUNCH_ATTRIBUTE_COOPERATIVE
- #define CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
- #define CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
- #define CU_KERNEL_NODE_ATTRIBUTE_PRIORITY CU_LAUNCH_ATTRIBUTE_PRIORITY
- #define CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP
- #define CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN
- #define CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_CLUSTER_DIMENSION CU_LAUNCH_ATTRIBUTE_PREFERRED_CLUSTER_DIMENSION
- #define CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE
- #define CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
- typedef CUlaunchAttributeValue CUkernelNodeAttrValue_v1;
- typedef CUkernelNodeAttrValue_v1 CUkernelNodeAttrValue;
- /**
- * Possible stream capture statuses returned by ::cuStreamIsCapturing
- */
- typedef enum CUstreamCaptureStatus_enum {
- CU_STREAM_CAPTURE_STATUS_NONE = 0, /**< Stream is not capturing */
- CU_STREAM_CAPTURE_STATUS_ACTIVE = 1, /**< Stream is actively capturing */
- CU_STREAM_CAPTURE_STATUS_INVALIDATED = 2 /**< Stream is part of a capture sequence that
- has been invalidated, but not terminated */
- } CUstreamCaptureStatus;
- /**
- * Possible modes for stream capture thread interactions. For more details see
- * ::cuStreamBeginCapture and ::cuThreadExchangeStreamCaptureMode
- */
- typedef enum CUstreamCaptureMode_enum {
- CU_STREAM_CAPTURE_MODE_GLOBAL = 0,
- CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1,
- CU_STREAM_CAPTURE_MODE_RELAXED = 2
- } CUstreamCaptureMode;
- typedef CUlaunchAttributeID CUstreamAttrID;
- #define CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW
- #define CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY
- #define CU_STREAM_ATTRIBUTE_PRIORITY CU_LAUNCH_ATTRIBUTE_PRIORITY
- #define CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP
- #define CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN
- typedef CUlaunchAttributeValue CUstreamAttrValue_v1;
- typedef CUstreamAttrValue_v1 CUstreamAttrValue;
- /**
- * Flags to specify search options. For more details see ::cuGetProcAddress
- */
- typedef enum CUdriverProcAddress_flags_enum {
- CU_GET_PROC_ADDRESS_DEFAULT = 0, /**< Default search mode for driver symbols. */
- CU_GET_PROC_ADDRESS_LEGACY_STREAM = 1 << 0, /**< Search for legacy versions of driver symbols. */
- CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM = 1 << 1 /**< Search for per-thread versions of driver symbols. */
- } CUdriverProcAddress_flags;
- /**
- * Flags to indicate search status. For more details see ::cuGetProcAddress
- */
- typedef enum CUdriverProcAddressQueryResult_enum {
- CU_GET_PROC_ADDRESS_SUCCESS = 0, /**< Symbol was succesfully found */
- CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND = 1, /**< Symbol was not found in search */
- CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT = 2 /**< Symbol was found but version supplied was not sufficient */
- } CUdriverProcAddressQueryResult;
- /**
- * Execution Affinity Types
- */
- typedef enum CUexecAffinityType_enum {
- CU_EXEC_AFFINITY_TYPE_SM_COUNT = 0, /**< Create a context with limited SMs. */
- CU_EXEC_AFFINITY_TYPE_MAX
- } CUexecAffinityType;
- /**
- * Value for ::CU_EXEC_AFFINITY_TYPE_SM_COUNT
- */
- typedef struct CUexecAffinitySmCount_st {
- unsigned int val; /**< The number of SMs the context is limited to use. */
- } CUexecAffinitySmCount_v1;
- typedef CUexecAffinitySmCount_v1 CUexecAffinitySmCount;
- /**
- * Execution Affinity Parameters
- */
- typedef struct CUexecAffinityParam_st {
- CUexecAffinityType type;
- union {
- CUexecAffinitySmCount smCount; /** Value for ::CU_EXEC_AFFINITY_TYPE_SM_COUNT */
- } param;
- } CUexecAffinityParam_v1;
- /**
- * Execution Affinity Parameters
- */
- typedef CUexecAffinityParam_v1 CUexecAffinityParam;
- typedef enum CUcigDataType_enum {
- CIG_DATA_TYPE_D3D12_COMMAND_QUEUE = 0x1, /** D3D12 Command Queue Handle */
- } CUcigDataType;
- /**
- * CIG Context Create Params
- */
- typedef struct CUctxCigParam_st {
- CUcigDataType sharedDataType;
- void* sharedData;
- } CUctxCigParam;
- /**
- * Params for creating CUDA context
- * Exactly one of execAffinityParams and cigParams
- * must be non-NULL.
- */
- typedef struct CUctxCreateParams_st {
- CUexecAffinityParam *execAffinityParams;
- int numExecAffinityParams;
- CUctxCigParam *cigParams;
- } CUctxCreateParams;
- /**
- * Library options to be specified with ::cuLibraryLoadData() or ::cuLibraryLoadFromFile()
- */
- typedef enum CUlibraryOption_enum
- {
- CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE = 0,
- /**
- * Specifes that the argument \p code passed to ::cuLibraryLoadData() will be preserved.
- * Specifying this option will let the driver know that \p code can be accessed at any point
- * until ::cuLibraryUnload(). The default behavior is for the driver to allocate and
- * maintain its own copy of \p code. Note that this is only a memory usage optimization
- * hint and the driver can choose to ignore it if required.
- * Specifying this option with ::cuLibraryLoadFromFile() is invalid and
- * will return ::CUDA_ERROR_INVALID_VALUE.
- */
- CU_LIBRARY_BINARY_IS_PRESERVED = 1,
- CU_LIBRARY_NUM_OPTIONS
- } CUlibraryOption;
- typedef struct CUlibraryHostUniversalFunctionAndDataTable_st
- {
- void *functionTable;
- size_t functionWindowSize;
- void *dataTable;
- size_t dataWindowSize;
- } CUlibraryHostUniversalFunctionAndDataTable;
- /**
- * Error codes
- */
- typedef enum cudaError_enum {
- /**
- * The API call returned with no errors. In the case of query calls, this
- * also means that the operation being queried is complete (see
- * ::cuEventQuery() and ::cuStreamQuery()).
- */
- CUDA_SUCCESS = 0,
- /**
- * This indicates that one or more of the parameters passed to the API call
- * is not within an acceptable range of values.
- */
- CUDA_ERROR_INVALID_VALUE = 1,
- /**
- * The API call failed because it was unable to allocate enough memory or
- * other resources to perform the requested operation.
- */
- CUDA_ERROR_OUT_OF_MEMORY = 2,
- /**
- * This indicates that the CUDA driver has not been initialized with
- * ::cuInit() or that initialization has failed.
- */
- CUDA_ERROR_NOT_INITIALIZED = 3,
- /**
- * This indicates that the CUDA driver is in the process of shutting down.
- */
- CUDA_ERROR_DEINITIALIZED = 4,
- /**
- * This indicates profiler is not initialized for this run. This can
- * happen when the application is running with external profiling tools
- * like visual profiler.
- */
- CUDA_ERROR_PROFILER_DISABLED = 5,
- /**
- * \deprecated
- * This error return is deprecated as of CUDA 5.0. It is no longer an error
- * to attempt to enable/disable the profiling via ::cuProfilerStart or
- * ::cuProfilerStop without initialization.
- */
- CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6,
- /**
- * \deprecated
- * This error return is deprecated as of CUDA 5.0. It is no longer an error
- * to call cuProfilerStart() when profiling is already enabled.
- */
- CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
- /**
- * \deprecated
- * This error return is deprecated as of CUDA 5.0. It is no longer an error
- * to call cuProfilerStop() when profiling is already disabled.
- */
- CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
- /**
- * This indicates that the CUDA driver that the application has loaded is a
- * stub library. Applications that run with the stub rather than a real
- * driver loaded will result in CUDA API returning this error.
- */
- CUDA_ERROR_STUB_LIBRARY = 34,
- /**
- * This indicates that requested CUDA device is unavailable at the current
- * time. Devices are often unavailable due to use of
- * ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS or ::CU_COMPUTEMODE_PROHIBITED.
- */
- CUDA_ERROR_DEVICE_UNAVAILABLE = 46,
- /**
- * This indicates that no CUDA-capable devices were detected by the installed
- * CUDA driver.
- */
- CUDA_ERROR_NO_DEVICE = 100,
- /**
- * This indicates that the device ordinal supplied by the user does not
- * correspond to a valid CUDA device or that the action requested is
- * invalid for the specified device.
- */
- CUDA_ERROR_INVALID_DEVICE = 101,
- /**
- * This error indicates that the Grid license is not applied.
- */
- CUDA_ERROR_DEVICE_NOT_LICENSED = 102,
- /**
- * This indicates that the device kernel image is invalid. This can also
- * indicate an invalid CUDA module.
- */
- CUDA_ERROR_INVALID_IMAGE = 200,
- /**
- * This most frequently indicates that there is no context bound to the
- * current thread. This can also be returned if the context passed to an
- * API call is not a valid handle (such as a context that has had
- * ::cuCtxDestroy() invoked on it). This can also be returned if a user
- * mixes different API versions (i.e. 3010 context with 3020 API calls).
- * See ::cuCtxGetApiVersion() for more details.
- * This can also be returned if the green context passed to an API call
- * was not converted to a ::CUcontext using ::cuCtxFromGreenCtx API.
- */
- CUDA_ERROR_INVALID_CONTEXT = 201,
- /**
- * This indicated that the context being supplied as a parameter to the
- * API call was already the active context.
- * \deprecated
- * This error return is deprecated as of CUDA 3.2. It is no longer an
- * error to attempt to push the active context via ::cuCtxPushCurrent().
- */
- CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
- /**
- * This indicates that a map or register operation has failed.
- */
- CUDA_ERROR_MAP_FAILED = 205,
- /**
- * This indicates that an unmap or unregister operation has failed.
- */
- CUDA_ERROR_UNMAP_FAILED = 206,
- /**
- * This indicates that the specified array is currently mapped and thus
- * cannot be destroyed.
- */
- CUDA_ERROR_ARRAY_IS_MAPPED = 207,
- /**
- * This indicates that the resource is already mapped.
- */
- CUDA_ERROR_ALREADY_MAPPED = 208,
- /**
- * This indicates that there is no kernel image available that is suitable
- * for the device. This can occur when a user specifies code generation
- * options for a particular CUDA source file that do not include the
- * corresponding device configuration.
- */
- CUDA_ERROR_NO_BINARY_FOR_GPU = 209,
- /**
- * This indicates that a resource has already been acquired.
- */
- CUDA_ERROR_ALREADY_ACQUIRED = 210,
- /**
- * This indicates that a resource is not mapped.
- */
- CUDA_ERROR_NOT_MAPPED = 211,
- /**
- * This indicates that a mapped resource is not available for access as an
- * array.
- */
- CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,
- /**
- * This indicates that a mapped resource is not available for access as a
- * pointer.
- */
- CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,
- /**
- * This indicates that an uncorrectable ECC error was detected during
- * execution.
- */
- CUDA_ERROR_ECC_UNCORRECTABLE = 214,
- /**
- * This indicates that the ::CUlimit passed to the API call is not
- * supported by the active device.
- */
- CUDA_ERROR_UNSUPPORTED_LIMIT = 215,
- /**
- * This indicates that the ::CUcontext passed to the API call can
- * only be bound to a single CPU thread at a time but is already
- * bound to a CPU thread.
- */
- CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
- /**
- * This indicates that peer access is not supported across the given
- * devices.
- */
- CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
- /**
- * This indicates that a PTX JIT compilation failed.
- */
- CUDA_ERROR_INVALID_PTX = 218,
- /**
- * This indicates an error with OpenGL or DirectX context.
- */
- CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
- /**
- * This indicates that an uncorrectable NVLink error was detected during the
- * execution.
- */
- CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
- /**
- * This indicates that the PTX JIT compiler library was not found.
- */
- CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
- /**
- * This indicates that the provided PTX was compiled with an unsupported toolchain.
- */
- CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,
- /**
- * This indicates that the PTX JIT compilation was disabled.
- */
- CUDA_ERROR_JIT_COMPILATION_DISABLED = 223,
- /**
- * This indicates that the ::CUexecAffinityType passed to the API call is not
- * supported by the active device.
- */
- CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224,
- /**
- * This indicates that the code to be compiled by the PTX JIT contains
- * unsupported call to cudaDeviceSynchronize.
- */
- CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC = 225,
- /**
- * This indicates that an exception occurred on the device that is now
- * contained by the GPU's error containment capability. Common causes are -
- * a. Certain types of invalid accesses of peer GPU memory over nvlink
- * b. Certain classes of hardware errors
- * This leaves the process in an inconsistent state and any further CUDA
- * work will return the same error. To continue using CUDA, the process must
- * be terminated and relaunched.
- */
- CUDA_ERROR_CONTAINED = 226,
- /**
- * This indicates that the device kernel source is invalid. This includes
- * compilation/linker errors encountered in device code or user error.
- */
- CUDA_ERROR_INVALID_SOURCE = 300,
- /**
- * This indicates that the file specified was not found.
- */
- CUDA_ERROR_FILE_NOT_FOUND = 301,
- /**
- * This indicates that a link to a shared object failed to resolve.
- */
- CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
- /**
- * This indicates that initialization of a shared object failed.
- */
- CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,
- /**
- * This indicates that an OS call failed.
- */
- CUDA_ERROR_OPERATING_SYSTEM = 304,
- /**
- * This indicates that a resource handle passed to the API call was not
- * valid. Resource handles are opaque types like ::CUstream and ::CUevent.
- */
- CUDA_ERROR_INVALID_HANDLE = 400,
- /**
- * This indicates that a resource required by the API call is not in a
- * valid state to perform the requested operation.
- */
- CUDA_ERROR_ILLEGAL_STATE = 401,
- /**
- * This indicates an attempt was made to introspect an object in a way that
- * would discard semantically important information. This is either due to
- * the object using funtionality newer than the API version used to
- * introspect it or omission of optional return arguments.
- */
- CUDA_ERROR_LOSSY_QUERY = 402,
- /**
- * This indicates that a named symbol was not found. Examples of symbols
- * are global/constant variable names, driver function names, texture names,
- * and surface names.
- */
- CUDA_ERROR_NOT_FOUND = 500,
- /**
- * This indicates that asynchronous operations issued previously have not
- * completed yet. This result is not actually an error, but must be indicated
- * differently than ::CUDA_SUCCESS (which indicates completion). Calls that
- * may return this value include ::cuEventQuery() and ::cuStreamQuery().
- */
- CUDA_ERROR_NOT_READY = 600,
- /**
- * While executing a kernel, the device encountered a
- * load or store instruction on an invalid memory address.
- * This leaves the process in an inconsistent state and any further CUDA work
- * will return the same error. To continue using CUDA, the process must be terminated
- * and relaunched.
- */
- CUDA_ERROR_ILLEGAL_ADDRESS = 700,
- /**
- * This indicates that a launch did not occur because it did not have
- * appropriate resources. This error usually indicates that the user has
- * attempted to pass too many arguments to the device kernel, or the
- * kernel launch specifies too many threads for the kernel's register
- * count. Passing arguments of the wrong size (i.e. a 64-bit pointer
- * when a 32-bit int is expected) is equivalent to passing too many
- * arguments and can also result in this error.
- */
- CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
- /**
- * This indicates that the device kernel took too long to execute. This can
- * only occur if timeouts are enabled - see the device attribute
- * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information.
- * This leaves the process in an inconsistent state and any further CUDA work
- * will return the same error. To continue using CUDA, the process must be terminated
- * and relaunched.
- */
- CUDA_ERROR_LAUNCH_TIMEOUT = 702,
- /**
- * This error indicates a kernel launch that uses an incompatible texturing
- * mode.
- */
- CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
- /**
- * This error indicates that a call to ::cuCtxEnablePeerAccess() is
- * trying to re-enable peer access to a context which has already
- * had peer access to it enabled.
- */
- CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,
- /**
- * This error indicates that ::cuCtxDisablePeerAccess() is
- * trying to disable peer access which has not been enabled yet
- * via ::cuCtxEnablePeerAccess().
- */
- CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705,
- /**
- * This error indicates that the primary context for the specified device
- * has already been initialized.
- */
- CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708,
- /**
- * This error indicates that the context current to the calling thread
- * has been destroyed using ::cuCtxDestroy, or is a primary context which
- * has not yet been initialized.
- */
- CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
- /**
- * A device-side assert triggered during kernel execution. The context
- * cannot be used anymore, and must be destroyed. All existing device
- * memory allocations from this context are invalid and must be
- * reconstructed if the program is to continue using CUDA.
- */
- CUDA_ERROR_ASSERT = 710,
- /**
- * This error indicates that the hardware resources required to enable
- * peer access have been exhausted for one or more of the devices
- * passed to ::cuCtxEnablePeerAccess().
- */
- CUDA_ERROR_TOO_MANY_PEERS = 711,
- /**
- * This error indicates that the memory range passed to ::cuMemHostRegister()
- * has already been registered.
- */
- CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
- /**
- * This error indicates that the pointer passed to ::cuMemHostUnregister()
- * does not correspond to any currently registered memory region.
- */
- CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713,
- /**
- * While executing a kernel, the device encountered a stack error.
- * This can be due to stack corruption or exceeding the stack size limit.
- * This leaves the process in an inconsistent state and any further CUDA work
- * will return the same error. To continue using CUDA, the process must be terminated
- * and relaunched.
- */
- CUDA_ERROR_HARDWARE_STACK_ERROR = 714,
- /**
- * While executing a kernel, the device encountered an illegal instruction.
- * This leaves the process in an inconsistent state and any further CUDA work
- * will return the same error. To continue using CUDA, the process must be terminated
- * and relaunched.
- */
- CUDA_ERROR_ILLEGAL_INSTRUCTION = 715,
- /**
- * While executing a kernel, the device encountered a load or store instruction
- * on a memory address which is not aligned.
- * This leaves the process in an inconsistent state and any further CUDA work
- * will return the same error. To continue using CUDA, the process must be terminated
- * and relaunched.
- */
- CUDA_ERROR_MISALIGNED_ADDRESS = 716,
- /**
- * While executing a kernel, the device encountered an instruction
- * which can only operate on memory locations in certain address spaces
- * (global, shared, or local), but was supplied a memory address not
- * belonging to an allowed address space.
- * This leaves the process in an inconsistent state and any further CUDA work
- * will return the same error. To continue using CUDA, the process must be terminated
- * and relaunched.
- */
- CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,
- /**
- * While executing a kernel, the device program counter wrapped its address space.
- * This leaves the process in an inconsistent state and any further CUDA work
- * will return the same error. To continue using CUDA, the process must be terminated
- * and relaunched.
- */
- CUDA_ERROR_INVALID_PC = 718,
- /**
- * An exception occurred on the device while executing a kernel. Common
- * causes include dereferencing an invalid device pointer and accessing
- * out of bounds shared memory. Less common cases can be system specific - more
- * information about these cases can be found in the system specific user guide.
- * This leaves the process in an inconsistent state and any further CUDA work
- * will return the same error. To continue using CUDA, the process must be terminated
- * and relaunched.
- */
- CUDA_ERROR_LAUNCH_FAILED = 719,
- /**
- * This error indicates that the number of blocks launched per grid for a kernel that was
- * launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice
- * exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor
- * or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors
- * as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT.
- */
- CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
- /**
- * An exception occurred on the device while exiting a kernel using tensor memory: the
- * tensor memory was not completely deallocated. This leaves the process in an inconsistent
- * state and any further CUDA work will return the same error. To continue using CUDA, the
- * process must be terminated and relaunched.
- */
- CUDA_ERROR_TENSOR_MEMORY_LEAK = 721,
- /**
- * This error indicates that the attempted operation is not permitted.
- */
- CUDA_ERROR_NOT_PERMITTED = 800,
- /**
- * This error indicates that the attempted operation is not supported
- * on the current system or device.
- */
- CUDA_ERROR_NOT_SUPPORTED = 801,
- /**
- * This error indicates that the system is not yet ready to start any CUDA
- * work. To continue using CUDA, verify the system configuration is in a
- * valid state and all required driver daemons are actively running.
- * More information about this error can be found in the system specific
- * user guide.
- */
- CUDA_ERROR_SYSTEM_NOT_READY = 802,
- /**
- * This error indicates that there is a mismatch between the versions of
- * the display driver and the CUDA driver. Refer to the compatibility documentation
- * for supported versions.
- */
- CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803,
- /**
- * This error indicates that the system was upgraded to run with forward compatibility
- * but the visible hardware detected by CUDA does not support this configuration.
- * Refer to the compatibility documentation for the supported hardware matrix or ensure
- * that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES
- * environment variable.
- */
- CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
- /**
- * This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.
- */
- CUDA_ERROR_MPS_CONNECTION_FAILED = 805,
- /**
- * This error indicates that the remote procedural call between the MPS server and the MPS client failed.
- */
- CUDA_ERROR_MPS_RPC_FAILURE = 806,
- /**
- * This error indicates that the MPS server is not ready to accept new MPS client requests.
- * This error can be returned when the MPS server is in the process of recovering from a fatal failure.
- */
- CUDA_ERROR_MPS_SERVER_NOT_READY = 807,
- /**
- * This error indicates that the hardware resources required to create MPS client have been exhausted.
- */
- CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808,
- /**
- * This error indicates the the hardware resources required to support device connections have been exhausted.
- */
- CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809,
- /**
- * This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.
- */
- CUDA_ERROR_MPS_CLIENT_TERMINATED = 810,
- /**
- * This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.
- */
- CUDA_ERROR_CDP_NOT_SUPPORTED = 811,
- /**
- * This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.
- */
- CUDA_ERROR_CDP_VERSION_MISMATCH = 812,
- /**
- * This error indicates that the operation is not permitted when
- * the stream is capturing.
- */
- CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900,
- /**
- * This error indicates that the current capture sequence on the stream
- * has been invalidated due to a previous error.
- */
- CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901,
- /**
- * This error indicates that the operation would have resulted in a merge
- * of two independent capture sequences.
- */
- CUDA_ERROR_STREAM_CAPTURE_MERGE = 902,
- /**
- * This error indicates that the capture was not initiated in this stream.
- */
- CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903,
- /**
- * This error indicates that the capture sequence contains a fork that was
- * not joined to the primary stream.
- */
- CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904,
- /**
- * This error indicates that a dependency would have been created which
- * crosses the capture sequence boundary. Only implicit in-stream ordering
- * dependencies are allowed to cross the boundary.
- */
- CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905,
- /**
- * This error indicates a disallowed implicit dependency on a current capture
- * sequence from cudaStreamLegacy.
- */
- CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906,
- /**
- * This error indicates that the operation is not permitted on an event which
- * was last recorded in a capturing stream.
- */
- CUDA_ERROR_CAPTURED_EVENT = 907,
- /**
- * A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED
- * argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a
- * different thread.
- */
- CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908,
- /**
- * This error indicates that the timeout specified for the wait operation has lapsed.
- */
- CUDA_ERROR_TIMEOUT = 909,
- /**
- * This error indicates that the graph update was not performed because it included
- * changes which violated constraints specific to instantiated graph update.
- */
- CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910,
- /**
- * This indicates that an async error has occurred in a device outside of CUDA.
- * If CUDA was waiting for an external device's signal before consuming shared data,
- * the external device signaled an error indicating that the data is not valid for
- * consumption. This leaves the process in an inconsistent state and any further CUDA
- * work will return the same error. To continue using CUDA, the process must be
- * terminated and relaunched.
- */
- CUDA_ERROR_EXTERNAL_DEVICE = 911,
- /**
- * Indicates a kernel launch error due to cluster misconfiguration.
- */
- CUDA_ERROR_INVALID_CLUSTER_SIZE = 912,
- /**
- * Indiciates a function handle is not loaded when calling an API that requires
- * a loaded function.
- */
- CUDA_ERROR_FUNCTION_NOT_LOADED = 913,
- /**
- * This error indicates one or more resources passed in are not valid resource
- * types for the operation.
- */
- CUDA_ERROR_INVALID_RESOURCE_TYPE = 914,
- /**
- * This error indicates one or more resources are insufficient or non-applicable for
- * the operation.
- */
- CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION = 915,
- /**
- * This error indicates that an error happened during the key rotation
- * sequence.
- */
- CUDA_ERROR_KEY_ROTATION = 916,
- /**
- * This indicates that an unknown internal error has occurred.
- */
- CUDA_ERROR_UNKNOWN = 999
- } CUresult;
- /**
- * P2P Attributes
- */
- typedef enum CUdevice_P2PAttribute_enum {
- CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01, /**< A relative value indicating the performance of the link between two devices */
- CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02, /**< P2P Access is enable */
- CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03, /**< Atomic operation over the link supported */
- CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 0x04, /**< \deprecated use CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED instead */
- CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x04 /**< Accessing CUDA arrays over the link supported */
- } CUdevice_P2PAttribute;
- /**
- * CUDA stream callback
- * \param hStream The stream the callback was added to, as passed to ::cuStreamAddCallback. May be NULL.
- * \param status ::CUDA_SUCCESS or any persistent error on the stream.
- * \param userData User parameter provided at registration.
- */
- typedef void (CUDA_CB *CUstreamCallback)(CUstream hStream, CUresult status, void *userData);
- /**
- * Block size to per-block dynamic shared memory mapping for a certain
- * kernel \param blockSize Block size of the kernel.
- *
- * \return The dynamic shared memory needed by a block.
- */
- typedef size_t (CUDA_CB *CUoccupancyB2DSize)(int blockSize);
- /**
- * If set, host memory is portable between CUDA contexts.
- * Flag for ::cuMemHostAlloc()
- */
- #define CU_MEMHOSTALLOC_PORTABLE 0x01
- /**
- * If set, host memory is mapped into CUDA address space and
- * ::cuMemHostGetDevicePointer() may be called on the host pointer.
- * Flag for ::cuMemHostAlloc()
- */
- #define CU_MEMHOSTALLOC_DEVICEMAP 0x02
- /**
- * If set, host memory is allocated as write-combined - fast to write,
- * faster to DMA, slow to read except via SSE4 streaming load instruction
- * (MOVNTDQA).
- * Flag for ::cuMemHostAlloc()
- */
- #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
- /**
- * If set, host memory is portable between CUDA contexts.
- * Flag for ::cuMemHostRegister()
- */
- #define CU_MEMHOSTREGISTER_PORTABLE 0x01
- /**
- * If set, host memory is mapped into CUDA address space and
- * ::cuMemHostGetDevicePointer() may be called on the host pointer.
- * Flag for ::cuMemHostRegister()
- */
- #define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
- /**
- * If set, the passed memory pointer is treated as pointing to some
- * memory-mapped I/O space, e.g. belonging to a third-party PCIe device.
- * On Windows the flag is a no-op.
- * On Linux that memory is marked as non cache-coherent for the GPU and
- * is expected to be physically contiguous. It may return
- * ::CUDA_ERROR_NOT_PERMITTED if run as an unprivileged user,
- * ::CUDA_ERROR_NOT_SUPPORTED on older Linux kernel versions.
- * On all other platforms, it is not supported and ::CUDA_ERROR_NOT_SUPPORTED
- * is returned.
- * Flag for ::cuMemHostRegister()
- */
- #define CU_MEMHOSTREGISTER_IOMEMORY 0x04
- /**
- * If set, the passed memory pointer is treated as pointing to memory that is
- * considered read-only by the device. On platforms without
- * ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, this flag is
- * required in order to register memory mapped to the CPU as read-only. Support
- * for the use of this flag can be queried from the device attribute
- * ::CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED. Using this flag with
- * a current context associated with a device that does not have this attribute
- * set will cause ::cuMemHostRegister to error with ::CUDA_ERROR_NOT_SUPPORTED.
- */
- #define CU_MEMHOSTREGISTER_READ_ONLY 0x08
- /**
- * 2D memory copy parameters
- */
- typedef struct CUDA_MEMCPY2D_st {
- size_t srcXInBytes; /**< Source X in bytes */
- size_t srcY; /**< Source Y */
- CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
- const void *srcHost; /**< Source host pointer */
- CUdeviceptr srcDevice; /**< Source device pointer */
- CUarray srcArray; /**< Source array reference */
- size_t srcPitch; /**< Source pitch (ignored when src is array) */
- size_t dstXInBytes; /**< Destination X in bytes */
- size_t dstY; /**< Destination Y */
- CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
- void *dstHost; /**< Destination host pointer */
- CUdeviceptr dstDevice; /**< Destination device pointer */
- CUarray dstArray; /**< Destination array reference */
- size_t dstPitch; /**< Destination pitch (ignored when dst is array) */
- size_t WidthInBytes; /**< Width of 2D memory copy in bytes */
- size_t Height; /**< Height of 2D memory copy */
- } CUDA_MEMCPY2D_v2;
- typedef CUDA_MEMCPY2D_v2 CUDA_MEMCPY2D;
- /**
- * 3D memory copy parameters
- */
- typedef struct CUDA_MEMCPY3D_st {
- size_t srcXInBytes; /**< Source X in bytes */
- size_t srcY; /**< Source Y */
- size_t srcZ; /**< Source Z */
- size_t srcLOD; /**< Source LOD */
- CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
- const void *srcHost; /**< Source host pointer */
- CUdeviceptr srcDevice; /**< Source device pointer */
- CUarray srcArray; /**< Source array reference */
- void *reserved0; /**< Must be NULL */
- size_t srcPitch; /**< Source pitch (ignored when src is array) */
- size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */
- size_t dstXInBytes; /**< Destination X in bytes */
- size_t dstY; /**< Destination Y */
- size_t dstZ; /**< Destination Z */
- size_t dstLOD; /**< Destination LOD */
- CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
- void *dstHost; /**< Destination host pointer */
- CUdeviceptr dstDevice; /**< Destination device pointer */
- CUarray dstArray; /**< Destination array reference */
- void *reserved1; /**< Must be NULL */
- size_t dstPitch; /**< Destination pitch (ignored when dst is array) */
- size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */
- size_t WidthInBytes; /**< Width of 3D memory copy in bytes */
- size_t Height; /**< Height of 3D memory copy */
- size_t Depth; /**< Depth of 3D memory copy */
- } CUDA_MEMCPY3D_v2;
- typedef CUDA_MEMCPY3D_v2 CUDA_MEMCPY3D;
- /**
- * 3D memory cross-context copy parameters
- */
- typedef struct CUDA_MEMCPY3D_PEER_st {
- size_t srcXInBytes; /**< Source X in bytes */
- size_t srcY; /**< Source Y */
- size_t srcZ; /**< Source Z */
- size_t srcLOD; /**< Source LOD */
- CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
- const void *srcHost; /**< Source host pointer */
- CUdeviceptr srcDevice; /**< Source device pointer */
- CUarray srcArray; /**< Source array reference */
- CUcontext srcContext; /**< Source context (ignored with srcMemoryType is ::CU_MEMORYTYPE_ARRAY) */
- size_t srcPitch; /**< Source pitch (ignored when src is array) */
- size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */
- size_t dstXInBytes; /**< Destination X in bytes */
- size_t dstY; /**< Destination Y */
- size_t dstZ; /**< Destination Z */
- size_t dstLOD; /**< Destination LOD */
- CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
- void *dstHost; /**< Destination host pointer */
- CUdeviceptr dstDevice; /**< Destination device pointer */
- CUarray dstArray; /**< Destination array reference */
- CUcontext dstContext; /**< Destination context (ignored with dstMemoryType is ::CU_MEMORYTYPE_ARRAY) */
- size_t dstPitch; /**< Destination pitch (ignored when dst is array) */
- size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */
- size_t WidthInBytes; /**< Width of 3D memory copy in bytes */
- size_t Height; /**< Height of 3D memory copy */
- size_t Depth; /**< Depth of 3D memory copy */
- } CUDA_MEMCPY3D_PEER_v1;
- typedef CUDA_MEMCPY3D_PEER_v1 CUDA_MEMCPY3D_PEER;
- /**
- * Memcpy node parameters
- */
- typedef struct CUDA_MEMCPY_NODE_PARAMS_st {
- int flags; /**< Must be zero */
- int reserved; /**< Must be zero */
- CUcontext copyCtx; /**< Context on which to run the node */
- CUDA_MEMCPY3D copyParams; /**< Parameters for the memory copy */
- } CUDA_MEMCPY_NODE_PARAMS;
- /**
- * Array descriptor
- */
- typedef struct CUDA_ARRAY_DESCRIPTOR_st
- {
- size_t Width; /**< Width of array */
- size_t Height; /**< Height of array */
- CUarray_format Format; /**< Array format */
- unsigned int NumChannels; /**< Channels per array element */
- } CUDA_ARRAY_DESCRIPTOR_v2;
- typedef CUDA_ARRAY_DESCRIPTOR_v2 CUDA_ARRAY_DESCRIPTOR;
- /**
- * 3D array descriptor
- */
- typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
- {
- size_t Width; /**< Width of 3D array */
- size_t Height; /**< Height of 3D array */
- size_t Depth; /**< Depth of 3D array */
- CUarray_format Format; /**< Array format */
- unsigned int NumChannels; /**< Channels per array element */
- unsigned int Flags; /**< Flags */
- } CUDA_ARRAY3D_DESCRIPTOR_v2;
- typedef CUDA_ARRAY3D_DESCRIPTOR_v2 CUDA_ARRAY3D_DESCRIPTOR;
- /**
- * Indicates that the layered sparse CUDA array or CUDA mipmapped array has a single mip tail region for all layers
- */
- #define CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL 0x1
- /**
- * CUDA array sparse properties
- */
- typedef struct CUDA_ARRAY_SPARSE_PROPERTIES_st {
- struct {
- unsigned int width; /**< Width of sparse tile in elements */
- unsigned int height; /**< Height of sparse tile in elements */
- unsigned int depth; /**< Depth of sparse tile in elements */
- } tileExtent;
- /**
- * First mip level at which the mip tail begins.
- */
- unsigned int miptailFirstLevel;
- /**
- * Total size of the mip tail.
- */
- unsigned long long miptailSize;
- /**
- * Flags will either be zero or ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL
- */
- unsigned int flags;
- unsigned int reserved[4];
- } CUDA_ARRAY_SPARSE_PROPERTIES_v1;
- typedef CUDA_ARRAY_SPARSE_PROPERTIES_v1 CUDA_ARRAY_SPARSE_PROPERTIES;
- /**
- * CUDA array memory requirements
- */
- typedef struct CUDA_ARRAY_MEMORY_REQUIREMENTS_st {
- size_t size; /**< Total required memory size */
- size_t alignment; /**< alignment requirement */
- unsigned int reserved[4];
- } CUDA_ARRAY_MEMORY_REQUIREMENTS_v1;
- typedef CUDA_ARRAY_MEMORY_REQUIREMENTS_v1 CUDA_ARRAY_MEMORY_REQUIREMENTS;
- /**
- * CUDA Resource descriptor
- */
- typedef struct CUDA_RESOURCE_DESC_st
- {
- CUresourcetype resType; /**< Resource type */
- union {
- struct {
- CUarray hArray; /**< CUDA array */
- } array;
- struct {
- CUmipmappedArray hMipmappedArray; /**< CUDA mipmapped array */
- } mipmap;
- struct {
- CUdeviceptr devPtr; /**< Device pointer */
- CUarray_format format; /**< Array format */
- unsigned int numChannels; /**< Channels per array element */
- size_t sizeInBytes; /**< Size in bytes */
- } linear;
- struct {
- CUdeviceptr devPtr; /**< Device pointer */
- CUarray_format format; /**< Array format */
- unsigned int numChannels; /**< Channels per array element */
- size_t width; /**< Width of the array in elements */
- size_t height; /**< Height of the array in elements */
- size_t pitchInBytes; /**< Pitch between two rows in bytes */
- } pitch2D;
- struct {
- int reserved[32];
- } reserved;
- } res;
- unsigned int flags; /**< Flags (must be zero) */
- } CUDA_RESOURCE_DESC_v1;
- typedef CUDA_RESOURCE_DESC_v1 CUDA_RESOURCE_DESC;
- /**
- * Texture descriptor
- */
- typedef struct CUDA_TEXTURE_DESC_st {
- CUaddress_mode addressMode[3]; /**< Address modes */
- CUfilter_mode filterMode; /**< Filter mode */
- unsigned int flags; /**< Flags */
- unsigned int maxAnisotropy; /**< Maximum anisotropy ratio */
- CUfilter_mode mipmapFilterMode; /**< Mipmap filter mode */
- float mipmapLevelBias; /**< Mipmap level bias */
- float minMipmapLevelClamp; /**< Mipmap minimum level clamp */
- float maxMipmapLevelClamp; /**< Mipmap maximum level clamp */
- float borderColor[4]; /**< Border Color */
- int reserved[12];
- } CUDA_TEXTURE_DESC_v1;
- typedef CUDA_TEXTURE_DESC_v1 CUDA_TEXTURE_DESC;
- /**
- * Resource view format
- */
- typedef enum CUresourceViewFormat_enum
- {
- CU_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
- CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */
- CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */
- CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */
- CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */
- CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */
- CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */
- CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */
- CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */
- CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */
- CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */
- CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */
- CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */
- CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */
- CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */
- CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */
- CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */
- CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */
- CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */
- CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */
- CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */
- CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */
- CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, /**< 1 channel 32-bit floating point */
- CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, /**< 2 channel 32-bit floating point */
- CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, /**< 4 channel 32-bit floating point */
- CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, /**< Block compressed 1 */
- CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, /**< Block compressed 2 */
- CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, /**< Block compressed 3 */
- CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, /**< Block compressed 4 unsigned */
- CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, /**< Block compressed 4 signed */
- CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, /**< Block compressed 5 unsigned */
- CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, /**< Block compressed 5 signed */
- CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */
- CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, /**< Block compressed 6 signed half-float */
- CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 /**< Block compressed 7 */
- } CUresourceViewFormat;
- /**
- * Resource view descriptor
- */
- typedef struct CUDA_RESOURCE_VIEW_DESC_st
- {
- CUresourceViewFormat format; /**< Resource view format */
- size_t width; /**< Width of the resource view */
- size_t height; /**< Height of the resource view */
- size_t depth; /**< Depth of the resource view */
- unsigned int firstMipmapLevel; /**< First defined mipmap level */
- unsigned int lastMipmapLevel; /**< Last defined mipmap level */
- unsigned int firstLayer; /**< First layer index */
- unsigned int lastLayer; /**< Last layer index */
- unsigned int reserved[16];
- } CUDA_RESOURCE_VIEW_DESC_v1;
- typedef CUDA_RESOURCE_VIEW_DESC_v1 CUDA_RESOURCE_VIEW_DESC;
- /**
- * Size of tensor map descriptor
- */
- #define CU_TENSOR_MAP_NUM_QWORDS 16
- /**
- * Tensor map descriptor. Requires compiler support for aligning to 64 bytes.
- */
- typedef struct CUtensorMap_st {
- #if defined(__cplusplus) && (__cplusplus >= 201103L)
- alignas(64)
- #elif __STDC_VERSION__ >= 201112L
- _Alignas(64)
- #endif
- cuuint64_t opaque[CU_TENSOR_MAP_NUM_QWORDS];
- } CUtensorMap;
- /**
- * Tensor map data type
- */
- typedef enum CUtensorMapDataType_enum {
- CU_TENSOR_MAP_DATA_TYPE_UINT8 = 0,
- CU_TENSOR_MAP_DATA_TYPE_UINT16,
- CU_TENSOR_MAP_DATA_TYPE_UINT32,
- CU_TENSOR_MAP_DATA_TYPE_INT32,
- CU_TENSOR_MAP_DATA_TYPE_UINT64,
- CU_TENSOR_MAP_DATA_TYPE_INT64,
- CU_TENSOR_MAP_DATA_TYPE_FLOAT16,
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32,
- CU_TENSOR_MAP_DATA_TYPE_FLOAT64,
- CU_TENSOR_MAP_DATA_TYPE_BFLOAT16,
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ,
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32,
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ,
- CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B,
- CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B,
- CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B
- } CUtensorMapDataType;
- /**
- * Tensor map interleave layout type
- */
- typedef enum CUtensorMapInterleave_enum {
- CU_TENSOR_MAP_INTERLEAVE_NONE = 0,
- CU_TENSOR_MAP_INTERLEAVE_16B,
- CU_TENSOR_MAP_INTERLEAVE_32B
- } CUtensorMapInterleave;
- /**
- * Tensor map swizzling mode of shared memory banks
- */
- typedef enum CUtensorMapSwizzle_enum {
- CU_TENSOR_MAP_SWIZZLE_NONE = 0,
- CU_TENSOR_MAP_SWIZZLE_32B,
- CU_TENSOR_MAP_SWIZZLE_64B,
- CU_TENSOR_MAP_SWIZZLE_128B,
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B,
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B_FLIP_8B,
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_64B
- } CUtensorMapSwizzle;
- /**
- * Tensor map L2 promotion type
- */
- typedef enum CUtensorMapL2promotion_enum {
- CU_TENSOR_MAP_L2_PROMOTION_NONE = 0,
- CU_TENSOR_MAP_L2_PROMOTION_L2_64B,
- CU_TENSOR_MAP_L2_PROMOTION_L2_128B,
- CU_TENSOR_MAP_L2_PROMOTION_L2_256B
- } CUtensorMapL2promotion;
- /**
- * Tensor map out-of-bounds fill type
- */
- typedef enum CUtensorMapFloatOOBfill_enum {
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0,
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA
- } CUtensorMapFloatOOBfill;
- /**
- * Tensor map Im2Col wide mode
- */
- typedef enum CUtensorMapIm2ColWideMode_enum {
- CU_TENSOR_MAP_IM2COL_WIDE_MODE_W = 0,
- CU_TENSOR_MAP_IM2COL_WIDE_MODE_W128
- } CUtensorMapIm2ColWideMode;
- /**
- * GPU Direct v3 tokens
- */
- typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
- unsigned long long p2pToken;
- unsigned int vaSpaceToken;
- } CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1;
- typedef CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1 CUDA_POINTER_ATTRIBUTE_P2P_TOKENS;
- /**
- * Access flags that specify the level of access the current context's device has
- * on the memory referenced.
- */
- typedef enum CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum {
- CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0x0, /**< No access, meaning the device cannot access this memory at all, thus must be staged through accessible memory in order to complete certain operations */
- CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 0x1, /**< Read-only access, meaning writes to this memory are considered invalid accesses and thus return error in that case. */
- CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 0x3 /**< Read-write access, the device has full read-write access to the memory */
- } CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS;
- /**
- * Kernel launch parameters
- */
- typedef struct CUDA_LAUNCH_PARAMS_st {
- CUfunction function; /**< Kernel to launch */
- unsigned int gridDimX; /**< Width of grid in blocks */
- unsigned int gridDimY; /**< Height of grid in blocks */
- unsigned int gridDimZ; /**< Depth of grid in blocks */
- unsigned int blockDimX; /**< X dimension of each thread block */
- unsigned int blockDimY; /**< Y dimension of each thread block */
- unsigned int blockDimZ; /**< Z dimension of each thread block */
- unsigned int sharedMemBytes; /**< Dynamic shared-memory size per thread block in bytes */
- CUstream hStream; /**< Stream identifier */
- void **kernelParams; /**< Array of pointers to kernel parameters */
- } CUDA_LAUNCH_PARAMS_v1;
- typedef CUDA_LAUNCH_PARAMS_v1 CUDA_LAUNCH_PARAMS;
- /**
- * External memory handle types
- */
- typedef enum CUexternalMemoryHandleType_enum {
- /**
- * Handle is an opaque file descriptor
- */
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
- /**
- * Handle is an opaque shared NT handle
- */
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
- /**
- * Handle is an opaque, globally shared handle
- */
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
- /**
- * Handle is a D3D12 heap object
- */
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
- /**
- * Handle is a D3D12 committed resource
- */
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
- /**
- * Handle is a shared NT handle to a D3D11 resource
- */
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,
- /**
- * Handle is a globally shared handle to a D3D11 resource
- */
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,
- /**
- * Handle is an NvSciBuf object
- */
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8,
- } CUexternalMemoryHandleType;
- /**
- * Indicates that the external memory object is a dedicated resource
- */
- #define CUDA_EXTERNAL_MEMORY_DEDICATED 0x1
- /** When the \p flags parameter of ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
- * contains this flag, it indicates that signaling an external semaphore object
- * should skip performing appropriate memory synchronization operations over all
- * the external memory objects that are imported as ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF,
- * which otherwise are performed by default to ensure data coherency with other
- * importers of the same NvSciBuf memory objects.
- */
- #define CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC 0x01
- /** When the \p flags parameter of ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
- * contains this flag, it indicates that waiting on an external semaphore object
- * should skip performing appropriate memory synchronization operations over all
- * the external memory objects that are imported as ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF,
- * which otherwise are performed by default to ensure data coherency with other
- * importers of the same NvSciBuf memory objects.
- */
- #define CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC 0x02
- /**
- * When \p flags of ::cuDeviceGetNvSciSyncAttributes is set to this,
- * it indicates that application needs signaler specific NvSciSyncAttr
- * to be filled by ::cuDeviceGetNvSciSyncAttributes.
- */
- #define CUDA_NVSCISYNC_ATTR_SIGNAL 0x1
- /**
- * When \p flags of ::cuDeviceGetNvSciSyncAttributes is set to this,
- * it indicates that application needs waiter specific NvSciSyncAttr
- * to be filled by ::cuDeviceGetNvSciSyncAttributes.
- */
- #define CUDA_NVSCISYNC_ATTR_WAIT 0x2
- /**
- * External memory handle descriptor
- */
- typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
- /**
- * Type of the handle
- */
- CUexternalMemoryHandleType type;
- union {
- /**
- * File descriptor referencing the memory object. Valid
- * when type is
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD
- */
- int fd;
- /**
- * Win32 handle referencing the semaphore object. Valid when
- * type is one of the following:
- * - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
- * - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT
- * - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP
- * - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE
- * - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE
- * - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT
- * Exactly one of 'handle' and 'name' must be non-NULL. If
- * type is one of the following:
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT
- * then 'name' must be NULL.
- */
- struct {
- /**
- * Valid NT handle. Must be NULL if 'name' is non-NULL
- */
- void *handle;
- /**
- * Name of a valid memory object.
- * Must be NULL if 'handle' is non-NULL.
- */
- const void *name;
- } win32;
- /**
- * A handle representing an NvSciBuf Object. Valid when type
- * is ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF
- */
- const void *nvSciBufObject;
- } handle;
- /**
- * Size of the memory allocation
- */
- unsigned long long size;
- /**
- * Flags must either be zero or ::CUDA_EXTERNAL_MEMORY_DEDICATED
- */
- unsigned int flags;
- unsigned int reserved[16];
- } CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1;
- typedef CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1 CUDA_EXTERNAL_MEMORY_HANDLE_DESC;
- /**
- * External memory buffer descriptor
- */
- typedef struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st {
- /**
- * Offset into the memory object where the buffer's base is
- */
- unsigned long long offset;
- /**
- * Size of the buffer
- */
- unsigned long long size;
- /**
- * Flags reserved for future use. Must be zero.
- */
- unsigned int flags;
- unsigned int reserved[16];
- } CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1;
- typedef CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1 CUDA_EXTERNAL_MEMORY_BUFFER_DESC;
- /**
- * External memory mipmap descriptor
- */
- typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {
- /**
- * Offset into the memory object where the base level of the
- * mipmap chain is.
- */
- unsigned long long offset;
- /**
- * Format, dimension and type of base level of the mipmap chain
- */
- CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
- /**
- * Total number of levels in the mipmap chain
- */
- unsigned int numLevels;
- unsigned int reserved[16];
- } CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1;
- typedef CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC;
- /**
- * External semaphore handle types
- */
- typedef enum CUexternalSemaphoreHandleType_enum {
- /**
- * Handle is an opaque file descriptor
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1,
- /**
- * Handle is an opaque shared NT handle
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2,
- /**
- * Handle is an opaque, globally shared handle
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
- /**
- * Handle is a shared NT handle referencing a D3D12 fence object
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4,
- /**
- * Handle is a shared NT handle referencing a D3D11 fence object
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5,
- /**
- * Opaque handle to NvSciSync Object
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6,
- /**
- * Handle is a shared NT handle referencing a D3D11 keyed mutex object
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7,
- /**
- * Handle is a globally shared handle referencing a D3D11 keyed mutex object
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8,
- /**
- * Handle is an opaque file descriptor referencing a timeline semaphore
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9,
- /**
- * Handle is an opaque shared NT handle referencing a timeline semaphore
- */
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10
- } CUexternalSemaphoreHandleType;
- /**
- * External semaphore handle descriptor
- */
- typedef struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
- /**
- * Type of the handle
- */
- CUexternalSemaphoreHandleType type;
- union {
- /**
- * File descriptor referencing the semaphore object. Valid
- * when type is one of the following:
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD
- */
- int fd;
- /**
- * Win32 handle referencing the semaphore object. Valid when
- * type is one of the following:
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32
- * Exactly one of 'handle' and 'name' must be non-NULL. If
- * type is one of the following:
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT
- * - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT
- * then 'name' must be NULL.
- */
- struct {
- /**
- * Valid NT handle. Must be NULL if 'name' is non-NULL
- */
- void *handle;
- /**
- * Name of a valid synchronization primitive.
- * Must be NULL if 'handle' is non-NULL.
- */
- const void *name;
- } win32;
- /**
- * Valid NvSciSyncObj. Must be non NULL
- */
- const void* nvSciSyncObj;
- } handle;
- /**
- * Flags reserved for the future. Must be zero.
- */
- unsigned int flags;
- unsigned int reserved[16];
- } CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1;
- typedef CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC;
- /**
- * External semaphore signal parameters
- */
- typedef struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st {
- struct {
- /**
- * Parameters for fence objects
- */
- struct {
- /**
- * Value of fence to be signaled
- */
- unsigned long long value;
- } fence;
- union {
- /**
- * Pointer to NvSciSyncFence. Valid if ::CUexternalSemaphoreHandleType
- * is of type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC.
- */
- void *fence;
- unsigned long long reserved;
- } nvSciSync;
- /**
- * Parameters for keyed mutex objects
- */
- struct {
- /**
- * Value of key to release the mutex with
- */
- unsigned long long key;
- } keyedMutex;
- unsigned int reserved[12];
- } params;
- /**
- * Only when ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS is used to
- * signal a ::CUexternalSemaphore of type
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, the valid flag is
- * ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC which indicates
- * that while signaling the ::CUexternalSemaphore, no memory synchronization
- * operations should be performed for any external memory object imported
- * as ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
- * For all other types of ::CUexternalSemaphore, flags must be zero.
- */
- unsigned int flags;
- unsigned int reserved[16];
- } CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1;
- typedef CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1 CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS;
- /**
- * External semaphore wait parameters
- */
- typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st {
- struct {
- /**
- * Parameters for fence objects
- */
- struct {
- /**
- * Value of fence to be waited on
- */
- unsigned long long value;
- } fence;
- /**
- * Pointer to NvSciSyncFence. Valid if CUexternalSemaphoreHandleType
- * is of type CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC.
- */
- union {
- void *fence;
- unsigned long long reserved;
- } nvSciSync;
- /**
- * Parameters for keyed mutex objects
- */
- struct {
- /**
- * Value of key to acquire the mutex with
- */
- unsigned long long key;
- /**
- * Timeout in milliseconds to wait to acquire the mutex
- */
- unsigned int timeoutMs;
- } keyedMutex;
- unsigned int reserved[10];
- } params;
- /**
- * Only when ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS is used to wait on
- * a ::CUexternalSemaphore of type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC,
- * the valid flag is ::CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC
- * which indicates that while waiting for the ::CUexternalSemaphore, no memory
- * synchronization operations should be performed for any external memory
- * object imported as ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
- * For all other types of ::CUexternalSemaphore, flags must be zero.
- */
- unsigned int flags;
- unsigned int reserved[16];
- } CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1;
- typedef CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1 CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS;
- /**
- * Semaphore signal node parameters
- */
- typedef struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st {
- CUexternalSemaphore* extSemArray; /**< Array of external semaphore handles. */
- const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray; /**< Array of external semaphore signal parameters. */
- unsigned int numExtSems; /**< Number of handles and parameters supplied in extSemArray and paramsArray. */
- } CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1;
- typedef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1 CUDA_EXT_SEM_SIGNAL_NODE_PARAMS;
- /**
- * Semaphore signal node parameters
- */
- typedef struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st {
- CUexternalSemaphore* extSemArray; /**< Array of external semaphore handles. */
- const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray; /**< Array of external semaphore signal parameters. */
- unsigned int numExtSems; /**< Number of handles and parameters supplied in extSemArray and paramsArray. */
- } CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2;
- /**
- * Semaphore wait node parameters
- */
- typedef struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st {
- CUexternalSemaphore* extSemArray; /**< Array of external semaphore handles. */
- const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray; /**< Array of external semaphore wait parameters. */
- unsigned int numExtSems; /**< Number of handles and parameters supplied in extSemArray and paramsArray. */
- } CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1;
- typedef CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1 CUDA_EXT_SEM_WAIT_NODE_PARAMS;
- /**
- * Semaphore wait node parameters
- */
- typedef struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st {
- CUexternalSemaphore* extSemArray; /**< Array of external semaphore handles. */
- const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray; /**< Array of external semaphore wait parameters. */
- unsigned int numExtSems; /**< Number of handles and parameters supplied in extSemArray and paramsArray. */
- } CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2;
- typedef unsigned long long CUmemGenericAllocationHandle_v1;
- typedef CUmemGenericAllocationHandle_v1 CUmemGenericAllocationHandle;
- /**
- * Flags for specifying particular handle types
- */
- typedef enum CUmemAllocationHandleType_enum {
- CU_MEM_HANDLE_TYPE_NONE = 0x0, /**< Does not allow any export mechanism. > */
- CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1, /**< Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. (int) */
- CU_MEM_HANDLE_TYPE_WIN32 = 0x2, /**< Allows a Win32 NT handle to be used for exporting. (HANDLE) */
- CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4, /**< Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE) */
- CU_MEM_HANDLE_TYPE_FABRIC = 0x8, /**< Allows a fabric handle to be used for exporting. (CUmemFabricHandle)*/
- CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF
- } CUmemAllocationHandleType;
- /**
- * Specifies the memory protection flags for mapping.
- */
- typedef enum CUmemAccess_flags_enum {
- CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0, /**< Default, make the address range not accessible */
- CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1, /**< Make the address range read accessible */
- CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3, /**< Make the address range read-write accessible */
- CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF
- } CUmemAccess_flags;
- /**
- * Specifies the type of location
- */
- typedef enum CUmemLocationType_enum {
- CU_MEM_LOCATION_TYPE_INVALID = 0x0,
- CU_MEM_LOCATION_TYPE_DEVICE = 0x1, /**< Location is a device location, thus id is a device ordinal */
- CU_MEM_LOCATION_TYPE_HOST = 0x2, /**< Location is host, id is ignored */
- CU_MEM_LOCATION_TYPE_HOST_NUMA = 0x3, /**< Location is a host NUMA node, thus id is a host NUMA node id */
- CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT = 0x4, /**< Location is a host NUMA node of the current thread, id is ignored */
- CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF
- } CUmemLocationType;
- /**
- * Defines the allocation types available
- */
- typedef enum CUmemAllocationType_enum {
- CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,
- /** This allocation type is 'pinned', i.e. cannot migrate from its current
- * location while the application is actively using it
- */
- CU_MEM_ALLOCATION_TYPE_PINNED = 0x1,
- CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF
- } CUmemAllocationType;
- /**
- * Flag for requesting different optimal and required granularities for an allocation.
- */
- typedef enum CUmemAllocationGranularity_flags_enum {
- CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0, /**< Minimum required granularity for allocation */
- CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1 /**< Recommended granularity for allocation for best performance */
- } CUmemAllocationGranularity_flags;
- /**
- * Specifies the handle type for address range
- */
- typedef enum CUmemRangeHandleType_enum
- {
- CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD = 0x1,
- CU_MEM_RANGE_HANDLE_TYPE_MAX = 0x7FFFFFFF
- } CUmemRangeHandleType;
- /**
- * Flag for requesting handle type for address range.
- */
- typedef enum CUmemRangeFlags_enum {
- CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE = 0x1 /**< Indicates that DMA_BUF handle should be mapped via PCIe BAR1 */
- } CUmemRangeFlags;
- /**
- * Sparse subresource types
- */
- typedef enum CUarraySparseSubresourceType_enum {
- CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0,
- CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1
- } CUarraySparseSubresourceType;
- /**
- * Memory operation types
- */
- typedef enum CUmemOperationType_enum {
- CU_MEM_OPERATION_TYPE_MAP = 1,
- CU_MEM_OPERATION_TYPE_UNMAP = 2
- } CUmemOperationType;
- /**
- * Memory handle types
- */
- typedef enum CUmemHandleType_enum {
- CU_MEM_HANDLE_TYPE_GENERIC = 0
- } CUmemHandleType;
- /**
- * Specifies the CUDA array or CUDA mipmapped array memory mapping information
- */
- typedef struct CUarrayMapInfo_st {
- CUresourcetype resourceType; /**< Resource type */
- union {
- CUmipmappedArray mipmap;
- CUarray array;
- } resource;
- CUarraySparseSubresourceType subresourceType; /**< Sparse subresource type */
- union {
- struct {
- unsigned int level; /**< For CUDA mipmapped arrays must a valid mipmap level. For CUDA arrays must be zero */
- unsigned int layer; /**< For CUDA layered arrays must be a valid layer index. Otherwise, must be zero */
- unsigned int offsetX; /**< Starting X offset in elements */
- unsigned int offsetY; /**< Starting Y offset in elements */
- unsigned int offsetZ; /**< Starting Z offset in elements */
- unsigned int extentWidth; /**< Width in elements */
- unsigned int extentHeight; /**< Height in elements */
- unsigned int extentDepth; /**< Depth in elements */
- } sparseLevel;
- struct {
- unsigned int layer; /**< For CUDA layered arrays must be a valid layer index. Otherwise, must be zero */
- unsigned long long offset; /**< Offset within mip tail */
- unsigned long long size; /**< Extent in bytes */
- } miptail;
- } subresource;
-
- CUmemOperationType memOperationType; /**< Memory operation type */
- CUmemHandleType memHandleType; /**< Memory handle type */
- union {
- CUmemGenericAllocationHandle memHandle;
- } memHandle;
-
- unsigned long long offset; /**< Offset within the memory */
- unsigned int deviceBitMask; /**< Device ordinal bit mask */
- unsigned int flags; /**< flags for future use, must be zero now. */
- unsigned int reserved[2]; /**< Reserved for future use, must be zero now. */
- } CUarrayMapInfo_v1;
- typedef CUarrayMapInfo_v1 CUarrayMapInfo;
- /**
- * Specifies a memory location.
- */
- typedef struct CUmemLocation_st {
- CUmemLocationType type; /**< Specifies the location type, which modifies the meaning of id. */
- int id; /**< identifier for a given this location's ::CUmemLocationType. */
- } CUmemLocation_v1;
- typedef CUmemLocation_v1 CUmemLocation;
- /**
- * Specifies compression attribute for an allocation.
- */
- typedef enum CUmemAllocationCompType_enum {
- CU_MEM_ALLOCATION_COMP_NONE = 0x0, /**< Allocating non-compressible memory */
- CU_MEM_ALLOCATION_COMP_GENERIC = 0x1 /**< Allocating compressible memory */
- } CUmemAllocationCompType;
- /**
- * This flag if set indicates that the memory will be used as a tile pool.
- */
- #define CU_MEM_CREATE_USAGE_TILE_POOL 0x1
- /**
- * This flag, if set, indicates that the memory will be used as a buffer for
- * hardware accelerated decompression.
- */
- #define CU_MEM_CREATE_USAGE_HW_DECOMPRESS 0x2
- /**
- * Specifies the allocation properties for a allocation.
- */
- typedef struct CUmemAllocationProp_st {
- /** Allocation type */
- CUmemAllocationType type;
- /** requested ::CUmemAllocationHandleType */
- CUmemAllocationHandleType requestedHandleTypes;
- /** Location of allocation */
- CUmemLocation location;
- /**
- * Windows-specific POBJECT_ATTRIBUTES required when
- * ::CU_MEM_HANDLE_TYPE_WIN32 is specified. This object attributes structure
- * includes security attributes that define
- * the scope of which exported allocations may be transferred to other
- * processes. In all other cases, this field is required to be zero.
- */
- void *win32HandleMetaData;
- struct {
- /**
- * Allocation hint for requesting compressible memory.
- * On devices that support Compute Data Compression, compressible
- * memory can be used to accelerate accesses to data with unstructured
- * sparsity and other compressible data patterns. Applications are
- * expected to query allocation property of the handle obtained with
- * ::cuMemCreate using ::cuMemGetAllocationPropertiesFromHandle to
- * validate if the obtained allocation is compressible or not. Note that
- * compressed memory may not be mappable on all devices.
- */
- unsigned char compressionType;
- unsigned char gpuDirectRDMACapable;
- /** Bitmask indicating intended usage for this allocation */
- unsigned short usage;
- unsigned char reserved[4];
- } allocFlags;
- } CUmemAllocationProp_v1;
- typedef CUmemAllocationProp_v1 CUmemAllocationProp;
- /**
- * Flags for querying different granularities for a multicast object
- */
- typedef enum CUmulticastGranularity_flags_enum {
- CU_MULTICAST_GRANULARITY_MINIMUM = 0x0, /**< Minimum required granularity */
- CU_MULTICAST_GRANULARITY_RECOMMENDED = 0x1 /**< Recommended granularity for best performance */
- } CUmulticastGranularity_flags;
- /**
- * Specifies the properties for a multicast object.
- */
- typedef struct CUmulticastObjectProp_st {
- /**
- * The number of devices in the multicast team that will bind memory to this
- * object
- */
- unsigned int numDevices;
- /**
- * The maximum amount of memory that can be bound to this multicast object
- * per device
- */
- size_t size;
- /**
- * Bitmask of exportable handle types (see ::CUmemAllocationHandleType) for
- * this object
- */
- unsigned long long handleTypes;
- /**
- * Flags for future use, must be zero now
- */
- unsigned long long flags;
- } CUmulticastObjectProp_v1;
- typedef CUmulticastObjectProp_v1 CUmulticastObjectProp;
- /**
- * Memory access descriptor
- */
- typedef struct CUmemAccessDesc_st {
- CUmemLocation location; /**< Location on which the request is to change it's accessibility */
- CUmemAccess_flags flags; /**< ::CUmemProt accessibility flags to set on the request */
- } CUmemAccessDesc_v1;
- typedef CUmemAccessDesc_v1 CUmemAccessDesc;
- /**
- * CUDA Graph Update error types
- */
- typedef enum CUgraphExecUpdateResult_enum {
- CU_GRAPH_EXEC_UPDATE_SUCCESS = 0x0, /**< The update succeeded */
- CU_GRAPH_EXEC_UPDATE_ERROR = 0x1, /**< The update failed for an unexpected reason which is described in the return value of the function */
- CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = 0x2, /**< The update failed because the topology changed */
- CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = 0x3, /**< The update failed because a node type changed */
- CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = 0x4, /**< The update failed because the function of a kernel node changed (CUDA driver < 11.2) */
- CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = 0x5, /**< The update failed because the parameters changed in a way that is not supported */
- CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = 0x6, /**< The update failed because something about the node is not supported */
- CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = 0x7, /**< The update failed because the function of a kernel node changed in an unsupported way */
- CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED = 0x8 /**< The update failed because the node attributes changed in a way that is not supported */
- } CUgraphExecUpdateResult;
- /**
- * Result information returned by cuGraphExecUpdate
- */
- typedef struct CUgraphExecUpdateResultInfo_st {
- /**
- * Gives more specific detail when a cuda graph update fails.
- */
- CUgraphExecUpdateResult result;
- /**
- * The "to node" of the error edge when the topologies do not match.
- * The error node when the error is associated with a specific node.
- * NULL when the error is generic.
- */
- CUgraphNode errorNode;
- /**
- * The from node of error edge when the topologies do not match. Otherwise NULL.
- */
- CUgraphNode errorFromNode;
- } CUgraphExecUpdateResultInfo_v1;
- typedef CUgraphExecUpdateResultInfo_v1 CUgraphExecUpdateResultInfo;
- /**
- * CUDA memory pool attributes
- */
- typedef enum CUmemPool_attribute_enum {
- /**
- * (value type = int)
- * Allow cuMemAllocAsync to use memory asynchronously freed
- * in another streams as long as a stream ordering dependency
- * of the allocating stream on the free action exists.
- * Cuda events and null stream interactions can create the required
- * stream ordered dependencies. (default enabled)
- */
- CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1,
- /**
- * (value type = int)
- * Allow reuse of already completed frees when there is no dependency
- * between the free and allocation. (default enabled)
- */
- CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
- /**
- * (value type = int)
- * Allow cuMemAllocAsync to insert new stream dependencies
- * in order to establish the stream ordering required to reuse
- * a piece of memory released by cuFreeAsync (default enabled).
- */
- CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,
- /**
- * (value type = cuuint64_t)
- * Amount of reserved memory in bytes to hold onto before trying
- * to release memory back to the OS. When more than the release
- * threshold bytes of memory are held by the memory pool, the
- * allocator will try to release memory back to the OS on the
- * next call to stream, event or context synchronize. (default 0)
- */
- CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
- /**
- * (value type = cuuint64_t)
- * Amount of backing memory currently allocated for the mempool.
- */
- CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT,
- /**
- * (value type = cuuint64_t)
- * High watermark of backing memory allocated for the mempool since the
- * last time it was reset. High watermark can only be reset to zero.
- */
- CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,
- /**
- * (value type = cuuint64_t)
- * Amount of memory from the pool that is currently in use by the application.
- */
- CU_MEMPOOL_ATTR_USED_MEM_CURRENT,
- /**
- * (value type = cuuint64_t)
- * High watermark of the amount of memory from the pool that was in use by the application since
- * the last time it was reset. High watermark can only be reset to zero.
- */
- CU_MEMPOOL_ATTR_USED_MEM_HIGH
- } CUmemPool_attribute;
- /**
- * This flag, if set, indicates that the memory will be used as a buffer for
- * hardware accelerated decompression.
- */
- #define CU_MEM_POOL_CREATE_USAGE_HW_DECOMPRESS 0x2
- /**
- * Specifies the properties of allocations made from the pool.
- */
- typedef struct CUmemPoolProps_st {
- CUmemAllocationType allocType; /**< Allocation type. Currently must be specified as CU_MEM_ALLOCATION_TYPE_PINNED */
- CUmemAllocationHandleType handleTypes; /**< Handle types that will be supported by allocations from the pool. */
- CUmemLocation location; /**< Location where allocations should reside. */
- /**
- * Windows-specific LPSECURITYATTRIBUTES required when
- * ::CU_MEM_HANDLE_TYPE_WIN32 is specified. This security attribute defines
- * the scope of which exported allocations may be transferred to other
- * processes. In all other cases, this field is required to be zero.
- */
- void *win32SecurityAttributes;
- size_t maxSize; /**< Maximum pool size. When set to 0, defaults to a system dependent value. */
- unsigned short usage; /**< Bitmask indicating intended usage for the pool. */
- unsigned char reserved[54]; /**< reserved for future use, must be 0 */
- } CUmemPoolProps_v1;
- typedef CUmemPoolProps_v1 CUmemPoolProps;
- /**
- * Opaque data for exporting a pool allocation
- */
- typedef struct CUmemPoolPtrExportData_st {
- unsigned char reserved[64];
- } CUmemPoolPtrExportData_v1;
- typedef CUmemPoolPtrExportData_v1 CUmemPoolPtrExportData;
- /**
- * Memory allocation node parameters
- */
- typedef struct CUDA_MEM_ALLOC_NODE_PARAMS_v1_st {
- /**
- * in: location where the allocation should reside (specified in ::location).
- * ::handleTypes must be ::CU_MEM_HANDLE_TYPE_NONE. IPC is not supported.
- */
- CUmemPoolProps poolProps;
- const CUmemAccessDesc *accessDescs; /**< in: array of memory access descriptors. Used to describe peer GPU access */
- size_t accessDescCount; /**< in: number of memory access descriptors. Must not exceed the number of GPUs. */
- size_t bytesize; /**< in: size in bytes of the requested allocation */
- CUdeviceptr dptr; /**< out: address of the allocation returned by CUDA */
- } CUDA_MEM_ALLOC_NODE_PARAMS_v1;
- typedef CUDA_MEM_ALLOC_NODE_PARAMS_v1 CUDA_MEM_ALLOC_NODE_PARAMS;
- /**
- * Memory allocation node parameters
- */
- typedef struct CUDA_MEM_ALLOC_NODE_PARAMS_v2_st {
- /**
- * in: location where the allocation should reside (specified in ::location).
- * ::handleTypes must be ::CU_MEM_HANDLE_TYPE_NONE. IPC is not supported.
- */
- CUmemPoolProps poolProps;
- const CUmemAccessDesc *accessDescs; /**< in: array of memory access descriptors. Used to describe peer GPU access */
- size_t accessDescCount; /**< in: number of memory access descriptors. Must not exceed the number of GPUs. */
- size_t bytesize; /**< in: size in bytes of the requested allocation */
- CUdeviceptr dptr; /**< out: address of the allocation returned by CUDA */
- } CUDA_MEM_ALLOC_NODE_PARAMS_v2;
- /**
- * Memory free node parameters
- */
- typedef struct CUDA_MEM_FREE_NODE_PARAMS_st {
- CUdeviceptr dptr; /**< in: the pointer to free */
- } CUDA_MEM_FREE_NODE_PARAMS;
- typedef enum CUgraphMem_attribute_enum {
- /**
- * (value type = cuuint64_t)
- * Amount of memory, in bytes, currently associated with graphs
- */
- CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT,
- /**
- * (value type = cuuint64_t)
- * High watermark of memory, in bytes, associated with graphs since the
- * last time it was reset. High watermark can only be reset to zero.
- */
- CU_GRAPH_MEM_ATTR_USED_MEM_HIGH,
- /**
- * (value type = cuuint64_t)
- * Amount of memory, in bytes, currently allocated for use by
- * the CUDA graphs asynchronous allocator.
- */
- CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT,
- /**
- * (value type = cuuint64_t)
- * High watermark of memory, in bytes, currently allocated for use by
- * the CUDA graphs asynchronous allocator.
- */
- CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH
- } CUgraphMem_attribute;
- /**
- * Child graph node parameters
- */
- typedef struct CUDA_CHILD_GRAPH_NODE_PARAMS_st {
- CUgraph graph; /**< The child graph to clone into the node for node creation, or
- a handle to the graph owned by the node for node query */
- } CUDA_CHILD_GRAPH_NODE_PARAMS;
- /**
- * Event record node parameters
- */
- typedef struct CUDA_EVENT_RECORD_NODE_PARAMS_st {
- CUevent event; /**< The event to record when the node executes */
- } CUDA_EVENT_RECORD_NODE_PARAMS;
- /**
- * Event wait node parameters
- */
- typedef struct CUDA_EVENT_WAIT_NODE_PARAMS_st {
- CUevent event; /**< The event to wait on from the node */
- } CUDA_EVENT_WAIT_NODE_PARAMS;
- /**
- * Graph node parameters. See ::cuGraphAddNode.
- */
- typedef struct CUgraphNodeParams_st {
- CUgraphNodeType type; /**< Type of the node */
- int reserved0[3]; /**< Reserved. Must be zero. */
- union {
- long long reserved1[29]; /**< Padding. Unused bytes must be zero. */
- CUDA_KERNEL_NODE_PARAMS_v3 kernel; /**< Kernel node parameters. */
- CUDA_MEMCPY_NODE_PARAMS memcpy; /**< Memcpy node parameters. */
- CUDA_MEMSET_NODE_PARAMS_v2 memset; /**< Memset node parameters. */
- CUDA_HOST_NODE_PARAMS_v2 host; /**< Host node parameters. */
- CUDA_CHILD_GRAPH_NODE_PARAMS graph; /**< Child graph node parameters. */
- CUDA_EVENT_WAIT_NODE_PARAMS eventWait; /**< Event wait node parameters. */
- CUDA_EVENT_RECORD_NODE_PARAMS eventRecord; /**< Event record node parameters. */
- CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2 extSemSignal; /**< External semaphore signal node parameters. */
- CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2 extSemWait; /**< External semaphore wait node parameters. */
- CUDA_MEM_ALLOC_NODE_PARAMS_v2 alloc; /**< Memory allocation node parameters. */
- CUDA_MEM_FREE_NODE_PARAMS free; /**< Memory free node parameters. */
- CUDA_BATCH_MEM_OP_NODE_PARAMS_v2 memOp; /**< MemOp node parameters. */
- CUDA_CONDITIONAL_NODE_PARAMS conditional; /**< Conditional node parameters. */
- };
- long long reserved2; /**< Reserved bytes. Must be zero. */
- } CUgraphNodeParams;
- /**
- * If set, each kernel launched as part of ::cuLaunchCooperativeKernelMultiDevice only
- * waits for prior work in the stream corresponding to that GPU to complete before the
- * kernel begins execution.
- */
- #define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC 0x01
- /**
- * If set, any subsequent work pushed in a stream that participated in a call to
- * ::cuLaunchCooperativeKernelMultiDevice will only wait for the kernel launched on
- * the GPU corresponding to that stream to complete before it begins execution.
- */
- #define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC 0x02
- /**
- * If set, the CUDA array is a collection of layers, where each layer is either a 1D
- * or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number
- * of layers, not the depth of a 3D array.
- */
- #define CUDA_ARRAY3D_LAYERED 0x01
- /**
- * Deprecated, use CUDA_ARRAY3D_LAYERED
- */
- #define CUDA_ARRAY3D_2DARRAY 0x01
- /**
- * This flag must be set in order to bind a surface reference
- * to the CUDA array
- */
- #define CUDA_ARRAY3D_SURFACE_LDST 0x02
- /**
- * If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The
- * width of such a CUDA array must be equal to its height, and Depth must be six.
- * If ::CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps
- * and Depth must be a multiple of six.
- */
- #define CUDA_ARRAY3D_CUBEMAP 0x04
- /**
- * This flag must be set in order to perform texture gather operations
- * on a CUDA array.
- */
- #define CUDA_ARRAY3D_TEXTURE_GATHER 0x08
- /**
- * This flag if set indicates that the CUDA
- * array is a DEPTH_TEXTURE.
- */
- #define CUDA_ARRAY3D_DEPTH_TEXTURE 0x10
- /**
- * This flag indicates that the CUDA array may be bound as a color target
- * in an external graphics API
- */
- #define CUDA_ARRAY3D_COLOR_ATTACHMENT 0x20
- /**
- * This flag if set indicates that the CUDA array or CUDA mipmapped array
- * is a sparse CUDA array or CUDA mipmapped array respectively
- */
- #define CUDA_ARRAY3D_SPARSE 0x40
- /**
- * This flag if set indicates that the CUDA array or CUDA mipmapped array
- * will allow deferred memory mapping
- */
- #define CUDA_ARRAY3D_DEFERRED_MAPPING 0x80
- /**
- * This flag indicates that the CUDA array will be used for hardware accelerated
- * video encode/decode operations.
- */
- #define CUDA_ARRAY3D_VIDEO_ENCODE_DECODE 0x100
- /**
- * Override the texref format with a format inferred from the array.
- * Flag for ::cuTexRefSetArray()
- */
- #define CU_TRSA_OVERRIDE_FORMAT 0x01
- /**
- * Read the texture as integers rather than promoting the values to floats
- * in the range [0,1].
- * Flag for ::cuTexRefSetFlags() and ::cuTexObjectCreate()
- */
- #define CU_TRSF_READ_AS_INTEGER 0x01
- /**
- * Use normalized texture coordinates in the range [0,1) instead of [0,dim).
- * Flag for ::cuTexRefSetFlags() and ::cuTexObjectCreate()
- */
- #define CU_TRSF_NORMALIZED_COORDINATES 0x02
- /**
- * Perform sRGB->linear conversion during texture read.
- * Flag for ::cuTexRefSetFlags() and ::cuTexObjectCreate()
- */
- #define CU_TRSF_SRGB 0x10
- /**
- * Disable any trilinear filtering optimizations.
- * Flag for ::cuTexRefSetFlags() and ::cuTexObjectCreate()
- */
- #define CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION 0x20
- /**
- * Enable seamless cube map filtering.
- * Flag for ::cuTexObjectCreate()
- */
- #define CU_TRSF_SEAMLESS_CUBEMAP 0x40
- /**
- * C++ compile time constant for CU_LAUNCH_PARAM_END
- */
- #define CU_LAUNCH_PARAM_END_AS_INT 0x00
- /**
- * End of array terminator for the \p extra parameter to
- * ::cuLaunchKernel
- */
- #define CU_LAUNCH_PARAM_END ((void*)CU_LAUNCH_PARAM_END_AS_INT)
- /**
- * C++ compile time constant for CU_LAUNCH_PARAM_BUFFER_POINTER
- */
- #define CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT 0x01
- /**
- * Indicator that the next value in the \p extra parameter to
- * ::cuLaunchKernel will be a pointer to a buffer containing all kernel
- * parameters used for launching kernel \p f. This buffer needs to
- * honor all alignment/padding requirements of the individual parameters.
- * If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the
- * \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no
- * effect.
- */
- #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT)
- /**
- * C++ compile time constant for CU_LAUNCH_PARAM_BUFFER_SIZE
- */
- #define CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT 0x02
- /**
- * Indicator that the next value in the \p extra parameter to
- * ::cuLaunchKernel will be a pointer to a size_t which contains the
- * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER.
- * It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified
- * in the \p extra array if the value associated with
- * ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
- */
- #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT)
- /**
- * For texture references loaded into the module, use default texunit from
- * texture reference.
- */
- #define CU_PARAM_TR_DEFAULT -1
- /**
- * Device that represents the CPU
- */
- #define CU_DEVICE_CPU ((CUdevice)-1)
- /**
- * Device that represents an invalid device
- */
- #define CU_DEVICE_INVALID ((CUdevice)-2)
- /**
- * Bitmasks for ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS
- */
- typedef enum CUflushGPUDirectRDMAWritesOptions_enum {
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST = 1<<0, /**< ::cuFlushGPUDirectRDMAWrites() and its CUDA Runtime API counterpart are supported on the device. */
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS = 1<<1 /**< The ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. */
- } CUflushGPUDirectRDMAWritesOptions;
- /**
- * Platform native ordering for GPUDirect RDMA writes
- */
- typedef enum CUGPUDirectRDMAWritesOrdering_enum {
- CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE = 0, /**< The device does not natively support ordering of remote writes. ::cuFlushGPUDirectRDMAWrites() can be leveraged if supported. */
- CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER = 100, /**< Natively, the device can consistently consume remote writes, although other CUDA devices may not. */
- CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES = 200 /**< Any CUDA device in the system can consistently consume remote writes to this device. */
- } CUGPUDirectRDMAWritesOrdering;
- /**
- * The scopes for ::cuFlushGPUDirectRDMAWrites
- */
- typedef enum CUflushGPUDirectRDMAWritesScope_enum {
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER = 100, /**< Blocks until remote writes are visible to the CUDA device context owning the data. */
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES = 200 /**< Blocks until remote writes are visible to all CUDA device contexts. */
- } CUflushGPUDirectRDMAWritesScope;
-
- /**
- * The targets for ::cuFlushGPUDirectRDMAWrites
- */
- typedef enum CUflushGPUDirectRDMAWritesTarget_enum {
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX = 0 /**< Sets the target for ::cuFlushGPUDirectRDMAWrites() to the currently active CUDA device context. */
- } CUflushGPUDirectRDMAWritesTarget;
- /**
- * The additional write options for ::cuGraphDebugDotPrint
- */
- typedef enum CUgraphDebugDot_flags_enum {
- CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE = 1<<0, /**< Output all debug data as if every debug flag is enabled */
- CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES = 1<<1, /**< Use CUDA Runtime structures for output */
- CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS = 1<<2, /**< Adds CUDA_KERNEL_NODE_PARAMS values to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS = 1<<3, /**< Adds CUDA_MEMCPY3D values to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS = 1<<4, /**< Adds CUDA_MEMSET_NODE_PARAMS values to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS = 1<<5, /**< Adds CUDA_HOST_NODE_PARAMS values to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS = 1<<6, /**< Adds CUevent handle from record and wait nodes to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS = 1<<7, /**< Adds CUDA_EXT_SEM_SIGNAL_NODE_PARAMS values to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS = 1<<8, /**< Adds CUDA_EXT_SEM_WAIT_NODE_PARAMS values to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES = 1<<9, /**< Adds CUkernelNodeAttrValue values to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES = 1<<10, /**< Adds node handles and every kernel function handle to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS = 1<<11, /**< Adds memory alloc node parameters to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS = 1<<12, /**< Adds memory free node parameters to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS = 1<<13, /**< Adds batch mem op node parameters to output */
- CU_GRAPH_DEBUG_DOT_FLAGS_EXTRA_TOPO_INFO = 1<<14, /**< Adds edge numbering information */
- CU_GRAPH_DEBUG_DOT_FLAGS_CONDITIONAL_NODE_PARAMS = 1<<15 /**< Adds conditional node parameters to output */
- } CUgraphDebugDot_flags;
- /**
- * Flags for user objects for graphs
- */
- typedef enum CUuserObject_flags_enum {
- CU_USER_OBJECT_NO_DESTRUCTOR_SYNC = 1 /**< Indicates the destructor execution is not synchronized by any CUDA handle. */
- } CUuserObject_flags;
- /**
- * Flags for retaining user object references for graphs
- */
- typedef enum CUuserObjectRetain_flags_enum {
- CU_GRAPH_USER_OBJECT_MOVE = 1 /**< Transfer references from the caller rather than creating new references. */
- } CUuserObjectRetain_flags;
- /**
- * Flags for instantiating a graph
- */
- typedef enum CUgraphInstantiate_flags_enum {
- CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH = 1 /**< Automatically free memory allocated in a graph before relaunching. */
- , CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD = 2 /**< Automatically upload the graph after instantiation. Only supported by
- ::cuGraphInstantiateWithParams. The upload will be performed using the
- stream provided in \p instantiateParams. */
- , CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH = 4 /**< Instantiate the graph to be launchable from the device. This flag can only
- be used on platforms which support unified addressing. This flag cannot be
- used in conjunction with CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH. */
- , CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY = 8 /**< Run the graph using the per-node priority attributes rather than the
- priority of the stream it is launched into. */
- } CUgraphInstantiate_flags;
- /**
- * CUDA device NUMA configuration
- */
- typedef enum CUdeviceNumaConfig_enum {
- CU_DEVICE_NUMA_CONFIG_NONE = 0, /**< The GPU is not a NUMA node */
- CU_DEVICE_NUMA_CONFIG_NUMA_NODE, /**< The GPU is a NUMA node, CU_DEVICE_ATTRIBUTE_NUMA_ID contains its NUMA ID */
- } CUdeviceNumaConfig;
- /**
- * CUDA Process States
- */
- typedef enum CUprocessState_enum {
- CU_PROCESS_STATE_RUNNING = 0, /**< Default process state */
- CU_PROCESS_STATE_LOCKED, /**< CUDA API locks are taken so further CUDA API calls will block */
- CU_PROCESS_STATE_CHECKPOINTED, /**< Application memory contents have been checkpointed and underlying allocations and device handles have been released */
- CU_PROCESS_STATE_FAILED, /**< Application entered an uncorrectable error during the checkpoint/restore process */
- } CUprocessState;
- /**
- * CUDA checkpoint optional lock arguments
- */
- typedef struct CUcheckpointLockArgs_st {
- unsigned int timeoutMs; /**< Timeout in milliseconds to attempt to lock the process, 0 indicates no timeout */
- unsigned int reserved0; /**< Reserved for future use, must be zero */
- cuuint64_t reserved1[7]; /**< Reserved for future use, must be zeroed */
- } CUcheckpointLockArgs;
- /**
- * CUDA checkpoint optional checkpoint arguments
- */
- typedef struct CUcheckpointCheckpointArgs_st {
- cuuint64_t reserved[8]; /**< Reserved for future use, must be zeroed */
- } CUcheckpointCheckpointArgs;
- /**
- * CUDA checkpoint optional restore arguments
- */
- typedef struct CUcheckpointRestoreArgs_st {
- cuuint64_t reserved[8]; /**< Reserved for future use, must be zeroed */
- } CUcheckpointRestoreArgs;
- /**
- * CUDA checkpoint optional unlock arguments
- */
- typedef struct CUcheckpointUnlockArgs_st {
- cuuint64_t reserved[8]; /**< Reserved for future use, must be zeroed */
- } CUcheckpointUnlockArgs;
- /**
- * Flags to specify for copies within a batch. For more details see ::cuMemcpyBatchAsync.
- */
- typedef enum CUmemcpyFlags_enum {
- CU_MEMCPY_FLAG_DEFAULT = 0x0,
- /**
- * Hint to the driver to try and overlap the copy with compute work on the SMs.
- */
- CU_MEMCPY_FLAG_PREFER_OVERLAP_WITH_COMPUTE = 0x1
- } CUmemcpyFlags;
- /**
- * These flags allow applications to convey the source access ordering CUDA must maintain.
- * The destination will always be accessed in stream order.
- */
- typedef enum CUmemcpySrcAccessOrder_enum {
- /**
- * Default invalid.
- */
- CU_MEMCPY_SRC_ACCESS_ORDER_INVALID = 0x0,
- /**
- * Indicates that access to the source pointer must be in stream order.
- */
- CU_MEMCPY_SRC_ACCESS_ORDER_STREAM = 0x1,
- /**
- * Indicates that access to the source pointer can be out of stream order and
- * all accesses must be complete before the API call returns. This flag is suited for
- * ephemeral sources (ex., stack variables) when it's known that no prior operations
- * in the stream can be accessing the memory and also that the lifetime of the memory
- * is limited to the scope that the source variable was declared in. Specifying
- * this flag allows the driver to optimize the copy and removes the need for the user
- * to synchronize the stream after the API call.
- */
- CU_MEMCPY_SRC_ACCESS_ORDER_DURING_API_CALL = 0x2,
- /**
- * Indicates that access to the source pointer can be out of stream order and the accesses
- * can happen even after the API call returns. This flag is suited for host pointers
- * allocated outside CUDA (ex., via malloc) when it's known that no prior operations
- * in the stream can be accessing the memory. Specifying this flag allows the driver
- * to optimize the copy on certain platforms.
- */
- CU_MEMCPY_SRC_ACCESS_ORDER_ANY = 0x3,
- CU_MEMCPY_SRC_ACCESS_ORDER_MAX = 0x7FFFFFFF
- } CUmemcpySrcAccessOrder;
- /**
- * Attributes specific to copies within a batch. For more details on usage see ::cuMemcpyBatchAsync.
- */
- typedef struct CUmemcpyAttributes_st {
- CUmemcpySrcAccessOrder srcAccessOrder; /**< Source access ordering to be observed for copies with this attribute. */
- CUmemLocation srcLocHint; /**< Hint location for the source operand. Ignored when the pointers are not managed memory or memory allocated outside CUDA. */
- CUmemLocation dstLocHint; /**< Hint location for the destination operand. Ignored when the pointers are not managed memory or memory allocated outside CUDA. */
- unsigned int flags; /**< Additional flags for copies with this attribute. See ::CUmemcpyFlags */
- } CUmemcpyAttributes_v1;
- typedef CUmemcpyAttributes_v1 CUmemcpyAttributes;
- /**
- * These flags allow applications to convey the operand type for individual copies specified in ::cuMemcpy3DBatchAsync.
- */
- typedef enum CUmemcpy3DOperandType_enum {
- CU_MEMCPY_OPERAND_TYPE_POINTER = 0x1, /**< Memcpy operand is a valid pointer. */
- CU_MEMCPY_OPERAND_TYPE_ARRAY = 0x2, /**< Memcpy operand is a CUarray. */
- CU_MEMCPY_OPERAND_TYPE_MAX = 0x7FFFFFFF
- } CUmemcpy3DOperandType;
- /**
- * Struct representing offset into a CUarray in elements
- */
- typedef struct CUoffset3D_st {
- size_t x;
- size_t y;
- size_t z;
- } CUoffset3D_v1;
- typedef CUoffset3D_v1 CUoffset3D;
- /**
- * Struct representing width/height/depth of a CUarray in elements
- */
- typedef struct CUextent3D_st {
- size_t width;
- size_t height;
- size_t depth;
- } CUextent3D_v1;
- typedef CUextent3D_v1 CUextent3D;
- /**
- * Struct representing an operand for copy with ::cuMemcpy3DBatchAsync
- */
- typedef struct CUmemcpy3DOperand_st {
- CUmemcpy3DOperandType type;
- union {
- /**
- * Struct representing an operand when ::CUmemcpy3DOperand::type is ::CU_MEMCPY_OPERAND_TYPE_POINTER
- */
- struct {
- CUdeviceptr ptr;
- size_t rowLength; /**< Length of each row in elements. */
- size_t layerHeight; /**< Height of each layer in elements. */
- CUmemLocation locHint; /**< Hint location for the operand. Ignored when the pointers are not managed memory or memory allocated outside CUDA. */
- } ptr;
- /**
- * Struct representing an operand when ::CUmemcpy3DOperand::type is ::CU_MEMCPY_OPERAND_TYPE_ARRAY
- */
- struct {
- CUarray array;
- CUoffset3D offset;
- } array;
- } op;
- } CUmemcpy3DOperand_v1;
- typedef CUmemcpy3DOperand_v1 CUmemcpy3DOperand;
- typedef struct CUDA_MEMCPY3D_BATCH_OP_st {
- CUmemcpy3DOperand src; /**< Source memcpy operand. */
- CUmemcpy3DOperand dst; /**< Destination memcpy operand. */
- CUextent3D extent; /**< Extents of the memcpy between src and dst. The width, height and depth components must not be 0.*/
- CUmemcpySrcAccessOrder srcAccessOrder; /**< Source access ordering to be observed for copy from src to dst. */
- unsigned int flags; /**< Additional flags for copies with this attribute. See ::CUmemcpyFlags */
- } CUDA_MEMCPY3D_BATCH_OP_v1;
- typedef CUDA_MEMCPY3D_BATCH_OP_v1 CUDA_MEMCPY3D_BATCH_OP;
- /** @} */ /* END CUDA_TYPES */
- #if defined(__GNUC__)
- #if defined(__CUDA_API_PUSH_VISIBILITY_DEFAULT)
- #pragma GCC visibility push(default)
- #endif
- #endif
- #ifdef _WIN32
- #define CUDAAPI __stdcall
- #else
- #define CUDAAPI
- #endif
- /**
- * \defgroup CUDA_ERROR Error Handling
- *
- * ___MANBRIEF___ error handling functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the error handling functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Gets the string description of an error code
- *
- * Sets \p *pStr to the address of a NULL-terminated string description
- * of the error code \p error.
- * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
- * will be returned and \p *pStr will be set to the NULL address.
- *
- * \param error - Error code to convert to string
- * \param pStr - Address of the string pointer.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::CUresult,
- * ::cudaGetErrorString
- */
- CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr);
- /**
- * \brief Gets the string representation of an error code enum name
- *
- * Sets \p *pStr to the address of a NULL-terminated string representation
- * of the name of the enum error code \p error.
- * If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE
- * will be returned and \p *pStr will be set to the NULL address.
- *
- * \param error - Error code to convert to string
- * \param pStr - Address of the string pointer.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::CUresult,
- * ::cudaGetErrorName
- */
- CUresult CUDAAPI cuGetErrorName(CUresult error, const char **pStr);
- /** @} */ /* END CUDA_ERROR */
- /**
- * \defgroup CUDA_INITIALIZE Initialization
- *
- * ___MANBRIEF___ initialization functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the initialization functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Initialize the CUDA driver API
- * Initializes the driver API and must be called before any other function from
- * the driver API in the current process. Currently, the \p Flags parameter must be 0. If ::cuInit()
- * has not been called, any function from the driver API will return
- * ::CUDA_ERROR_NOT_INITIALIZED.
- *
- * \param Flags - Initialization flag for CUDA.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_SYSTEM_DRIVER_MISMATCH,
- * ::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
- * \notefnerr
- */
- CUresult CUDAAPI cuInit(unsigned int Flags);
- /** @} */ /* END CUDA_INITIALIZE */
- /**
- * \defgroup CUDA_VERSION Version Management
- *
- * ___MANBRIEF___ version management functions of the low-level CUDA driver
- * API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the version management functions of the low-level
- * CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Returns the latest CUDA version supported by driver
- *
- * Returns in \p *driverVersion the version of CUDA supported by
- * the driver. The version is returned as
- * (1000 × major + 10 × minor). For example, CUDA 9.2
- * would be represented by 9020.
- *
- * This function automatically returns ::CUDA_ERROR_INVALID_VALUE if
- * \p driverVersion is NULL.
- *
- * \param driverVersion - Returns the CUDA driver version
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::cudaDriverGetVersion,
- * ::cudaRuntimeGetVersion
- */
- CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);
- /** @} */ /* END CUDA_VERSION */
- /**
- * \defgroup CUDA_DEVICE Device Management
- *
- * ___MANBRIEF___ device management functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the device management functions of the low-level
- * CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Returns a handle to a compute device
- *
- * Returns in \p *device a device handle given an ordinal in the range <b>[0,
- * ::cuDeviceGetCount()-1]</b>.
- *
- * \param device - Returned device handle
- * \param ordinal - Device number to get handle for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGetUuid,
- * ::cuDeviceGetLuid,
- * ::cuDeviceTotalMem,
- * ::cuDeviceGetExecAffinitySupport
- */
- CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
- /**
- * \brief Returns the number of compute-capable devices
- *
- * Returns in \p *count the number of devices with compute capability greater
- * than or equal to 2.0 that are available for execution. If there is no such
- * device, ::cuDeviceGetCount() returns 0.
- *
- * \param count - Returned number of compute-capable devices
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetName,
- * ::cuDeviceGetUuid,
- * ::cuDeviceGetLuid,
- * ::cuDeviceGet,
- * ::cuDeviceTotalMem,
- * ::cuDeviceGetExecAffinitySupport,
- * ::cudaGetDeviceCount
- */
- CUresult CUDAAPI cuDeviceGetCount(int *count);
- /**
- * \brief Returns an identifier string for the device
- *
- * Returns an ASCII string identifying the device \p dev in the NULL-terminated
- * string pointed to by \p name. \p len specifies the maximum length of the
- * string that may be returned.
- *
- * \param name - Returned identifier string for the device
- * \param len - Maximum length of string to store in \p name
- * \param dev - Device to get identifier string for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetUuid,
- * ::cuDeviceGetLuid,
- * ::cuDeviceGetCount,
- * ::cuDeviceGet,
- * ::cuDeviceTotalMem,
- * ::cuDeviceGetExecAffinitySupport,
- * ::cudaGetDeviceProperties
- */
- CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev);
- /**
- * \brief Return an UUID for the device
- *
- * Note there is a later version of this API, ::cuDeviceGetUuid_v2. It will
- * supplant this version in 12.0, which is retained for minor version compatibility.
- *
- * Returns 16-octets identifying the device \p dev in the structure
- * pointed by the \p uuid.
- *
- * \param uuid - Returned UUID
- * \param dev - Device to get identifier string for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetUuid_v2
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGetLuid,
- * ::cuDeviceGet,
- * ::cuDeviceTotalMem,
- * ::cuDeviceGetExecAffinitySupport,
- * ::cudaGetDeviceProperties
- */
- CUresult CUDAAPI cuDeviceGetUuid(CUuuid *uuid, CUdevice dev);
- /**
- * \brief Return an UUID for the device (11.4+)
- *
- * Returns 16-octets identifying the device \p dev in the structure
- * pointed by the \p uuid. If the device is in MIG mode, returns its
- * MIG UUID which uniquely identifies the subscribed MIG compute instance.
- *
- * \param uuid - Returned UUID
- * \param dev - Device to get identifier string for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGetLuid,
- * ::cuDeviceGet,
- * ::cuDeviceTotalMem,
- * ::cudaGetDeviceProperties
- */
- CUresult CUDAAPI cuDeviceGetUuid_v2(CUuuid *uuid, CUdevice dev);
- /**
- * \brief Return an LUID and device node mask for the device
- *
- * Return identifying information (\p luid and \p deviceNodeMask) to allow
- * matching device with graphics APIs.
- *
- * \param luid - Returned LUID
- * \param deviceNodeMask - Returned device node mask
- * \param dev - Device to get identifier string for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGet,
- * ::cuDeviceTotalMem,
- * ::cuDeviceGetExecAffinitySupport,
- * ::cudaGetDeviceProperties
- */
- CUresult CUDAAPI cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask, CUdevice dev);
- /**
- * \brief Returns the total amount of memory on the device
- *
- * Returns in \p *bytes the total amount of memory available on the device
- * \p dev in bytes.
- *
- * \param bytes - Returned memory available on device in bytes
- * \param dev - Device handle
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGetUuid,
- * ::cuDeviceGet,
- * ::cuDeviceGetExecAffinitySupport,
- * ::cudaMemGetInfo
- */
- CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev);
- /**
- * \brief Returns the maximum number of elements allocatable in a 1D linear texture for a given texture element size.
- *
- * Returns in \p maxWidthInElements the maximum number of texture elements allocatable in a 1D linear texture
- * for given \p format and \p numChannels.
- *
- * \param maxWidthInElements - Returned maximum number of texture elements allocatable for given \p format and \p numChannels.
- * \param format - Texture format.
- * \param numChannels - Number of channels per texture element.
- * \param dev - Device handle.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGetUuid,
- * ::cuDeviceGet,
- * ::cudaMemGetInfo,
- * ::cuDeviceTotalMem
- */
- CUresult CUDAAPI cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, CUarray_format format, unsigned numChannels, CUdevice dev);
- /**
- * \brief Returns information about the device
- *
- * Returns in \p *pi the integer value of the attribute \p attrib on device
- * \p dev. The supported attributes are:
- * - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads per
- * block;
- * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: Maximum x-dimension of a block
- * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: Maximum y-dimension of a block
- * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: Maximum z-dimension of a block
- * - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: Maximum x-dimension of a grid
- * - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: Maximum y-dimension of a grid
- * - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: Maximum z-dimension of a grid
- * - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of
- * shared memory available to a thread block in bytes
- * - ::CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on device for
- * __constant__ variables in a CUDA C kernel in bytes
- * - ::CU_DEVICE_ATTRIBUTE_WARP_SIZE: Warp size in threads
- * - ::CU_DEVICE_ATTRIBUTE_MAX_PITCH: Maximum pitch in bytes allowed by the
- * memory copy functions that involve memory regions allocated through
- * ::cuMemAllocPitch()
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: Maximum 1D
- * texture width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: Maximum width
- * for a 1D texture bound to linear memory
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH: Maximum
- * mipmapped 1D texture width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: Maximum 2D
- * texture width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: Maximum 2D
- * texture height
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: Maximum width
- * for a 2D texture bound to linear memory
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: Maximum height
- * for a 2D texture bound to linear memory
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: Maximum pitch
- * in bytes for a 2D texture bound to linear memory
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH: Maximum
- * mipmapped 2D texture width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT: Maximum
- * mipmapped 2D texture height
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: Maximum 3D
- * texture width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: Maximum 3D
- * texture height
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: Maximum 3D
- * texture depth
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE:
- * Alternate maximum 3D texture width, 0 if no alternate
- * maximum 3D texture size is supported
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE:
- * Alternate maximum 3D texture height, 0 if no alternate
- * maximum 3D texture size is supported
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE:
- * Alternate maximum 3D texture depth, 0 if no alternate
- * maximum 3D texture size is supported
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH:
- * Maximum cubemap texture width or height
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH:
- * Maximum 1D layered texture width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS:
- * Maximum layers in a 1D layered texture
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH:
- * Maximum 2D layered texture width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT:
- * Maximum 2D layered texture height
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS:
- * Maximum layers in a 2D layered texture
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH:
- * Maximum cubemap layered texture width or height
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS:
- * Maximum layers in a cubemap layered texture
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH:
- * Maximum 1D surface width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH:
- * Maximum 2D surface width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT:
- * Maximum 2D surface height
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH:
- * Maximum 3D surface width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT:
- * Maximum 3D surface height
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH:
- * Maximum 3D surface depth
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH:
- * Maximum 1D layered surface width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS:
- * Maximum layers in a 1D layered surface
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH:
- * Maximum 2D layered surface width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT:
- * Maximum 2D layered surface height
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS:
- * Maximum layers in a 2D layered surface
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH:
- * Maximum cubemap surface width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH:
- * Maximum cubemap layered surface width
- * - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS:
- * Maximum layers in a cubemap layered surface
- * - ::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bit
- * registers available to a thread block
- * - ::CU_DEVICE_ATTRIBUTE_CLOCK_RATE: The typical clock frequency in kilohertz
- * - ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: Alignment requirement; texture
- * base addresses aligned to ::textureAlign bytes do not need an offset
- * applied to texture fetches
- * - ::CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: Pitch alignment requirement
- * for 2D texture references bound to pitched memory
- * - ::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: 1 if the device can concurrently copy
- * memory between host and device while executing a kernel, or 0 if not
- * - ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors on
- * the device
- * - ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: 1 if there is a run time limit
- * for kernels executed on the device, or 0 if not
- * - ::CU_DEVICE_ATTRIBUTE_INTEGRATED: 1 if the device is integrated with the
- * memory subsystem, or 0 if not
- * - ::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: 1 if the device can map host
- * memory into the CUDA address space, or 0 if not
- * - ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: Compute mode that device is currently
- * in. Available modes are as follows:
- * - ::CU_COMPUTEMODE_DEFAULT: Default mode - Device is not restricted and
- * can have multiple CUDA contexts present at a single time.
- * - ::CU_COMPUTEMODE_PROHIBITED: Compute-prohibited mode - Device is
- * prohibited from creating new CUDA contexts.
- * - ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS: Compute-exclusive-process mode - Device
- * can have only one context used by a single process at a time.
- * - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: 1 if the device supports
- * executing multiple kernels within the same context simultaneously, or 0 if
- * not. It is not guaranteed that multiple kernels will be resident
- * on the device concurrently so this feature should not be relied upon for
- * correctness.
- * - ::CU_DEVICE_ATTRIBUTE_ECC_ENABLED: 1 if error correction is enabled on the
- * device, 0 if error correction is disabled or not supported by the device
- * - ::CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: PCI bus identifier of the device
- * - ::CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: PCI device (also known as slot) identifier
- * of the device
- * - ::CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID: PCI domain identifier of the device
- * - ::CU_DEVICE_ATTRIBUTE_TCC_DRIVER: 1 if the device is using a TCC driver. TCC
- * is only available on Tesla hardware running Windows Vista or later
- * - ::CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE: Peak memory clock frequency in kilohertz
- * - ::CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: Global memory bus width in bits
- * - ::CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE: Size of L2 cache in bytes. 0 if the device doesn't have L2 cache
- * - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: Maximum resident threads per multiprocessor
- * - ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING: 1 if the device shares a unified address space with
- * the host, or 0 if not
- * - ::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: Major compute capability version number
- * - ::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: Minor compute capability version number
- * - ::CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED: 1 if device supports caching globals
- * in L1 cache, 0 if caching globals in L1 cache is not supported by the device
- * - ::CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED: 1 if device supports caching locals
- * in L1 cache, 0 if caching locals in L1 cache is not supported by the device
- * - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR: Maximum amount of
- * shared memory available to a multiprocessor in bytes; this amount is shared
- * by all thread blocks simultaneously resident on a multiprocessor
- * - ::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR: Maximum number of 32-bit
- * registers available to a multiprocessor; this number is shared by all thread
- * blocks simultaneously resident on a multiprocessor
- * - ::CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY: 1 if device supports allocating managed memory
- * on this system, 0 if allocating managed memory is not supported by the device on this system.
- * - ::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD: 1 if device is on a multi-GPU board, 0 if not.
- * - ::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID: Unique identifier for a group of devices
- * associated with the same board. Devices on the same multi-GPU board will share the same identifier.
- * - ::CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED: 1 if Link between the device and the host
- * supports native atomic operations.
- * - ::CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO: Ratio of single precision performance
- * (in floating-point operations per second) to double precision performance.
- * - ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS: Device supports coherently accessing
- * pageable memory without calling cudaHostRegister on it.
- * - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS: Device can coherently access managed memory
- * concurrently with the CPU.
- * - ::CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED: Device supports Compute Preemption.
- * - ::CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM: Device can access host registered
- * memory at the same virtual address as the CPU.
- * - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN: The maximum per block shared memory size
- * supported on this device. This is the maximum value that can be opted into when using the cuFuncSetAttribute() or cuKernelSetAttribute() call.
- * For more details see ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
- * - ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES: Device accesses pageable memory via the host's
- * page tables.
- * - ::CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST: The host can directly access managed memory on the device without migration.
- * - ::CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED: Device supports virtual memory management APIs like ::cuMemAddressReserve, ::cuMemCreate, ::cuMemMap and related APIs
- * - ::CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED: Device supports exporting memory to a posix file descriptor with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate
- * - ::CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED: Device supports exporting memory to a Win32 NT handle with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate
- * - ::CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED: Device supports exporting memory to a Win32 KMT handle with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate
- * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR: Maximum number of thread blocks that can reside on a multiprocessor
- * - ::CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED: Device supports compressible memory allocation via ::cuMemCreate
- * - ::CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE: Maximum L2 persisting lines capacity setting in bytes
- * - ::CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE: Maximum value of CUaccessPolicyWindow::num_bytes
- * - ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED: Device supports specifying the GPUDirect RDMA flag with ::cuMemCreate.
- * - ::CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK: Amount of shared memory per block reserved by CUDA driver in bytes
- * - ::CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED: Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays.
- * - ::CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED: Device supports using the ::cuMemHostRegister flag ::CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU
- * - ::CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED: Device supports using the ::cuMemAllocAsync and ::cuMemPool family of APIs
- * - ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED: Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
- * - ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS: The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the ::CUflushGPUDirectRDMAWritesOptions enum
- * - ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING: GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See ::CUGPUDirectRDMAWritesOrdering for the numerical values returned here.
- * - ::CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES: Bitmask of handle types supported with mempool based IPC
- * - ::CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED: Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays.
- * - ::CU_DEVICE_ATTRIBUTE_NUMA_CONFIG: NUMA configuration of a device: value is of type ::CUdeviceNumaConfig enum
- * - ::CU_DEVICE_ATTRIBUTE_NUMA_ID: NUMA node ID of the GPU memory
- * - ::CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED: Device supports switch multicast and reduction operations.
- * - ::CU_DEVICE_ATTRIBUTE_GPU_PCI_DEVICE_ID: The combined 16-bit PCI device ID and 16-bit PCI vendor ID.
- * - ::CU_DEVICE_ATTRIBUTE_GPU_PCI_SUBSYSTEM_ID: The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor ID.
- ID.
- *
- * \param pi - Returned device attribute value
- * \param attrib - Device attribute to query
- * \param dev - Device handle
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGetUuid,
- * ::cuDeviceGet,
- * ::cuDeviceTotalMem,
- * ::cuDeviceGetExecAffinitySupport,
- * ::cudaDeviceGetAttribute,
- * ::cudaGetDeviceProperties
- */
- CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
- /**
- * \brief Return NvSciSync attributes that this device can support.
- *
- * Returns in \p nvSciSyncAttrList, the properties of NvSciSync that
- * this CUDA device, \p dev can support. The returned \p nvSciSyncAttrList
- * can be used to create an NvSciSync object that matches this device's capabilities.
- *
- * If NvSciSyncAttrKey_RequiredPerm field in \p nvSciSyncAttrList is
- * already set this API will return ::CUDA_ERROR_INVALID_VALUE.
- *
- * The applications should set \p nvSciSyncAttrList to a valid
- * NvSciSyncAttrList failing which this API will return
- * ::CUDA_ERROR_INVALID_HANDLE.
- *
- * The \p flags controls how applications intends to use
- * the NvSciSync created from the \p nvSciSyncAttrList. The valid flags are:
- * - ::CUDA_NVSCISYNC_ATTR_SIGNAL, specifies that the applications intends to
- * signal an NvSciSync on this CUDA device.
- * - ::CUDA_NVSCISYNC_ATTR_WAIT, specifies that the applications intends to
- * wait on an NvSciSync on this CUDA device.
- *
- * At least one of these flags must be set, failing which the API
- * returns ::CUDA_ERROR_INVALID_VALUE. Both the flags are orthogonal
- * to one another: a developer may set both these flags that allows to
- * set both wait and signal specific attributes in the same \p nvSciSyncAttrList.
- *
- * Note that this API updates the input \p nvSciSyncAttrList with values equivalent
- * to the following public attribute key-values:
- * NvSciSyncAttrKey_RequiredPerm is set to
- * - NvSciSyncAccessPerm_SignalOnly if ::CUDA_NVSCISYNC_ATTR_SIGNAL is set in \p flags.
- * - NvSciSyncAccessPerm_WaitOnly if ::CUDA_NVSCISYNC_ATTR_WAIT is set in \p flags.
- * - NvSciSyncAccessPerm_WaitSignal if both ::CUDA_NVSCISYNC_ATTR_WAIT and
- * ::CUDA_NVSCISYNC_ATTR_SIGNAL are set in \p flags.
- * NvSciSyncAttrKey_PrimitiveInfo is set to
- * - NvSciSyncAttrValPrimitiveType_SysmemSemaphore on any valid \p device.
- * - NvSciSyncAttrValPrimitiveType_Syncpoint if \p device is a Tegra device.
- * - NvSciSyncAttrValPrimitiveType_SysmemSemaphorePayload64b if \p device is GA10X+.
- * NvSciSyncAttrKey_GpuId is set to the same UUID that is returned for this
- * \p device from ::cuDeviceGetUuid.
- *
- * \param nvSciSyncAttrList - Return NvSciSync attributes supported.
- * \param dev - Valid Cuda Device to get NvSciSync attributes for.
- * \param flags - flags describing NvSciSync usage.
- *
- * \return
- *
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa
- * ::cuImportExternalSemaphore,
- * ::cuDestroyExternalSemaphore,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync
- */
- CUresult CUDAAPI cuDeviceGetNvSciSyncAttributes(void *nvSciSyncAttrList, CUdevice dev, int flags);
- /**
- * \brief Sets the current memory pool of a device
- *
- * The memory pool must be local to the specified device.
- * ::cuMemAllocAsync allocates from the current mempool of the provided stream's device.
- * By default, a device's current memory pool is its default memory pool.
- *
- * \note Use ::cuMemAllocFromPoolAsync to specify asynchronous allocations from a device different
- * than the one the stream runs on.
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuDeviceGetDefaultMemPool, ::cuDeviceGetMemPool, ::cuMemPoolCreate, ::cuMemPoolDestroy, ::cuMemAllocFromPoolAsync
- */
- CUresult CUDAAPI cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool);
- /**
- * \brief Gets the current mempool for a device
- *
- * Returns the last pool provided to ::cuDeviceSetMemPool for this device
- * or the device's default memory pool if ::cuDeviceSetMemPool has never been called.
- * By default the current mempool is the default mempool for a device.
- * Otherwise the returned pool must have been set with ::cuDeviceSetMemPool.
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuDeviceGetDefaultMemPool, ::cuMemPoolCreate, ::cuDeviceSetMemPool
- */
- CUresult CUDAAPI cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev);
- /**
- * \brief Returns the default mempool of a device
- *
- * The default mempool of a device contains device memory from that device.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuMemAllocAsync, ::cuMemPoolTrimTo, ::cuMemPoolGetAttribute, ::cuMemPoolSetAttribute, cuMemPoolSetAccess, ::cuDeviceGetMemPool, ::cuMemPoolCreate
- */
- CUresult CUDAAPI cuDeviceGetDefaultMemPool(CUmemoryPool *pool_out, CUdevice dev);
- /**
- * \brief Returns information about the execution affinity support of the device.
- *
- * Returns in \p *pi whether execution affinity type \p type is supported by device \p dev.
- * The supported types are:
- * - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT: 1 if context with limited SMs is supported by the device,
- * or 0 if not;
- *
- * \param pi - 1 if the execution affinity type \p type is supported by the device, or 0 if not
- * \param type - Execution affinity type to query
- * \param dev - Device handle
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGetUuid,
- * ::cuDeviceGet,
- * ::cuDeviceTotalMem
- */
- CUresult CUDAAPI cuDeviceGetExecAffinitySupport(int *pi, CUexecAffinityType type, CUdevice dev);
- /**
- * \brief Blocks until remote writes are visible to the specified scope
- *
- * Blocks until GPUDirect RDMA writes to the target context via mappings
- * created through APIs like nvidia_p2p_get_pages (see
- * https://docs.nvidia.com/cuda/gpudirect-rdma for more information), are
- * visible to the specified scope.
- *
- * If the scope equals or lies within the scope indicated by
- * ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, the call
- * will be a no-op and can be safely omitted for performance. This can be
- * determined by comparing the numerical values between the two enums, with
- * smaller scopes having smaller values.
- *
- * On platforms that support GPUDirect RDMA writes via more than one path in
- * hardware (see ::CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE), the user should
- * consider those paths as belonging to separate ordering domains. Note that in
- * such cases CUDA driver will report both RDMA writes ordering and RDMA write
- * scope as ALL_DEVICES and a call to cuFlushGPUDirectRDMA will be a no-op,
- * but when these multiple paths are used simultaneously, it is the user's
- * responsibility to ensure ordering by using mechanisms outside the scope of
- * CUDA.
- *
- * Users may query support for this API via
- * ::CU_DEVICE_ATTRIBUTE_FLUSH_FLUSH_GPU_DIRECT_RDMA_OPTIONS.
- *
- * \param target - The target of the operation, see ::CUflushGPUDirectRDMAWritesTarget
- * \param scope - The scope of the operation, see ::CUflushGPUDirectRDMAWritesScope
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- */
- CUresult CUDAAPI cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope);
- /** @} */ /* END CUDA_DEVICE */
- /**
- * \defgroup CUDA_DEVICE_DEPRECATED Device Management [DEPRECATED]
- *
- * ___MANBRIEF___ deprecated device management functions of the low-level CUDA
- * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the device management functions of the low-level
- * CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Returns properties for a selected device
- *
- * \deprecated
- *
- * This function was deprecated as of CUDA 5.0 and replaced by ::cuDeviceGetAttribute().
- *
- * Returns in \p *prop the properties of device \p dev. The ::CUdevprop
- * structure is defined as:
- *
- * \code
- typedef struct CUdevprop_st {
- int maxThreadsPerBlock;
- int maxThreadsDim[3];
- int maxGridSize[3];
- int sharedMemPerBlock;
- int totalConstantMemory;
- int SIMDWidth;
- int memPitch;
- int regsPerBlock;
- int clockRate;
- int textureAlign
- } CUdevprop;
- * \endcode
- * where:
- *
- * - ::maxThreadsPerBlock is the maximum number of threads per block;
- * - ::maxThreadsDim[3] is the maximum sizes of each dimension of a block;
- * - ::maxGridSize[3] is the maximum sizes of each dimension of a grid;
- * - ::sharedMemPerBlock is the total amount of shared memory available per
- * block in bytes;
- * - ::totalConstantMemory is the total amount of constant memory available on
- * the device in bytes;
- * - ::SIMDWidth is the warp size;
- * - ::memPitch is the maximum pitch allowed by the memory copy functions that
- * involve memory regions allocated through ::cuMemAllocPitch();
- * - ::regsPerBlock is the total number of registers available per block;
- * - ::clockRate is the clock frequency in kilohertz;
- * - ::textureAlign is the alignment requirement; texture base addresses that
- * are aligned to ::textureAlign bytes do not need an offset applied to
- * texture fetches.
- *
- * \param prop - Returned properties of device
- * \param dev - Device to get properties for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGetUuid,
- * ::cuDeviceGet,
- * ::cuDeviceTotalMem
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
- /**
- * \brief Returns the compute capability of the device
- *
- * \deprecated
- *
- * This function was deprecated as of CUDA 5.0 and its functionality superseded
- * by ::cuDeviceGetAttribute().
- *
- * Returns in \p *major and \p *minor the major and minor revision numbers that
- * define the compute capability of the device \p dev.
- *
- * \param major - Major revision number
- * \param minor - Minor revision number
- * \param dev - Device handle
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetCount,
- * ::cuDeviceGetName,
- * ::cuDeviceGetUuid,
- * ::cuDeviceGet,
- * ::cuDeviceTotalMem
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
- /** @} */ /* END CUDA_DEVICE_DEPRECATED */
- /**
- * \defgroup CUDA_PRIMARY_CTX Primary Context Management
- *
- * ___MANBRIEF___ primary context management functions of the low-level CUDA driver
- * API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the primary context management functions of the low-level
- * CUDA driver application programming interface.
- *
- * The primary context is unique per device and shared with the CUDA runtime API.
- * These functions allow integration with other libraries using CUDA.
- *
- * @{
- */
- /**
- * \brief Retain the primary context on the GPU
- *
- * Retains the primary context on the device.
- * Once the user successfully retains the primary context, the primary context
- * will be active and available to the user until the user releases it
- * with ::cuDevicePrimaryCtxRelease() or resets it with ::cuDevicePrimaryCtxReset().
- * Unlike ::cuCtxCreate() the newly retained context is not pushed onto the stack.
- *
- * Retaining the primary context for the first time will fail with ::CUDA_ERROR_UNKNOWN
- * if the compute mode of the device is ::CU_COMPUTEMODE_PROHIBITED. The function
- * ::cuDeviceGetAttribute() can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to
- * determine the compute mode of the device.
- * The <i>nvidia-smi</i> tool can be used to set the compute mode for
- * devices. Documentation for <i>nvidia-smi</i> can be obtained by passing a
- * -h option to it.
- *
- * Please note that the primary context always supports pinned allocations. Other
- * flags can be specified by ::cuDevicePrimaryCtxSetFlags().
- *
- * \param pctx - Returned context handle of the new context
- * \param dev - Device for which primary context is requested
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa ::cuDevicePrimaryCtxRelease,
- * ::cuDevicePrimaryCtxSetFlags,
- * ::cuCtxCreate,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize
- */
- CUresult CUDAAPI cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev);
- /**
- * \brief Release the primary context on the GPU
- *
- * Releases the primary context interop on the device.
- * A retained context should always be released once the user is done using
- * it. The context is automatically reset once the last reference to it is
- * released. This behavior is different when the primary context was retained
- * by the CUDA runtime from CUDA 4.0 and earlier. In this case, the primary
- * context remains always active.
- *
- * Releasing a primary context that has not been previously retained will
- * fail with ::CUDA_ERROR_INVALID_CONTEXT.
- *
- * Please note that unlike ::cuCtxDestroy() this method does not pop the context
- * from stack in any circumstances.
- *
- * \param dev - Device which primary context is released
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_CONTEXT
- * \notefnerr
- *
- * \sa ::cuDevicePrimaryCtxRetain,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize
- */
- CUresult CUDAAPI cuDevicePrimaryCtxRelease(CUdevice dev);
- /**
- * \brief Set flags for the primary context
- *
- * Sets the flags for the primary context on the device overwriting perviously
- * set ones.
- *
- * The three LSBs of the \p flags parameter can be used to control how the OS
- * thread, which owns the CUDA context at the time of an API call, interacts
- * with the OS scheduler when waiting for results from the GPU. Only one of
- * the scheduling flags can be set when creating a context.
- *
- * - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for
- * results from the GPU. This can decrease latency when waiting for the GPU,
- * but may lower the performance of CPU threads if they are performing work in
- * parallel with the CUDA thread.
- *
- * - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for
- * results from the GPU. This can increase latency when waiting for the GPU,
- * but can increase the performance of CPU threads performing work in parallel
- * with the GPU.
- *
- * - ::CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
- * synchronization primitive when waiting for the GPU to finish work.
- *
- * - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
- * synchronization primitive when waiting for the GPU to finish work. <br>
- * <b>Deprecated:</b> This flag was deprecated as of CUDA 4.0 and was
- * replaced with ::CU_CTX_SCHED_BLOCKING_SYNC.
- *
- * - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero,
- * uses a heuristic based on the number of active CUDA contexts in the
- * process \e C and the number of logical processors in the system \e P. If
- * \e C > \e P, then CUDA will yield to other OS threads when waiting for
- * the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while
- * waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN).
- * Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on
- * the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC
- * for low-powered devices.
- *
- * - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory
- * after resizing local memory for a kernel. This can prevent thrashing by
- * local memory allocations when launching many kernels with high local
- * memory usage at the cost of potentially increased memory usage. <br>
- * <b>Deprecated:</b> This flag is deprecated and the behavior enabled
- * by this flag is now the default and cannot be disabled.
- *
- * - ::CU_CTX_COREDUMP_ENABLE: If GPU coredumps have not been enabled globally
- * with ::cuCoredumpSetAttributeGlobal or environment variables, this flag can
- * be set during context creation to instruct CUDA to create a coredump if
- * this context raises an exception during execution. These environment variables
- * are described in the CUDA-GDB user guide under the "GPU core dump support"
- * section.
- * The initial settings will be taken from the global settings at the time of
- * context creation. The other settings that control coredump output can be
- * modified by calling ::cuCoredumpSetAttribute from the created context after
- * it becomes current.
- *
- * - ::CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU coredumps have not
- * been enabled globally with ::cuCoredumpSetAttributeGlobal or environment
- * variables, this flag can be set during context creation to instruct CUDA to
- * create a coredump if data is written to a certain pipe that is present in the
- * OS space. These environment variables are described in the CUDA-GDB user
- * guide under the "GPU core dump support" section.
- * It is important to note that the pipe name *must* be set with
- * ::cuCoredumpSetAttributeGlobal before creating the context if this flag is
- * used. Setting this flag implies that ::CU_CTX_COREDUMP_ENABLE is set.
- * The initial settings will be taken from the global settings at the time of
- * context creation. The other settings that control coredump output can be
- * modified by calling ::cuCoredumpSetAttribute from the created context after
- * it becomes current.
- *
- * - ::CU_CTX_SYNC_MEMOPS: Ensures that synchronous memory operations initiated
- * on this context will always synchronize. See further documentation in the
- * section titled "API Synchronization behavior" to learn more about cases when
- * synchronous memory operations can exhibit asynchronous behavior.
- *
- * \param dev - Device for which the primary context flags are set
- * \param flags - New flags for the device
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- * \sa ::cuDevicePrimaryCtxRetain,
- * ::cuDevicePrimaryCtxGetState,
- * ::cuCtxCreate,
- * ::cuCtxGetFlags,
- * ::cuCtxSetFlags,
- * ::cudaSetDeviceFlags
- */
- CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags);
- /**
- * \brief Get the state of the primary context
- *
- * Returns in \p *flags the flags for the primary context of \p dev, and in
- * \p *active whether it is active. See ::cuDevicePrimaryCtxSetFlags for flag
- * values.
- *
- * \param dev - Device to get primary context flags for
- * \param flags - Pointer to store flags
- * \param active - Pointer to store context state; 0 = inactive, 1 = active
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- * \sa
- * ::cuDevicePrimaryCtxSetFlags,
- * ::cuCtxGetFlags,
- * ::cuCtxSetFlags,
- * ::cudaGetDeviceFlags
- */
- CUresult CUDAAPI cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, int *active);
- /**
- * \brief Destroy all allocations and reset all state on the primary context
- *
- * Explicitly destroys and cleans up all resources associated with the current
- * device in the current process.
- *
- * Note that it is responsibility of the calling function to ensure that no
- * other module in the process is using the device any more. For that reason
- * it is recommended to use ::cuDevicePrimaryCtxRelease() in most cases.
- * However it is safe for other modules to call ::cuDevicePrimaryCtxRelease()
- * even after resetting the device.
- * Resetting the primary context does not release it, an application that has
- * retained the primary context should explicitly release its usage.
- *
- * \param dev - Device for which primary context is destroyed
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE
- * \notefnerr
- *
- * \sa ::cuDevicePrimaryCtxRetain,
- * ::cuDevicePrimaryCtxRelease,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize,
- * ::cudaDeviceReset
- */
- CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev);
- /** @} */ /* END CUDA_PRIMARY_CTX */
- /**
- * \defgroup CUDA_CTX Context Management
- *
- * ___MANBRIEF___ context management functions of the low-level CUDA driver
- * API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the context management functions of the low-level
- * CUDA driver application programming interface.
- *
- * Please note that some functions are described in
- * \ref CUDA_PRIMARY_CTX "Primary Context Management" section.
- *
- * @{
- */
- /**
- * \brief Create a CUDA context
- *
- * \note In most cases it is recommended to use ::cuDevicePrimaryCtxRetain.
- *
- * Creates a new CUDA context and associates it with the calling thread. The
- * \p flags parameter is described below. The context is created with a usage
- * count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy()
- * when done using the context. If a context is already current to the thread,
- * it is supplanted by the newly created context and may be restored by a subsequent
- * call to ::cuCtxPopCurrent().
- *
- * The three LSBs of the \p flags parameter can be used to control how the OS
- * thread, which owns the CUDA context at the time of an API call, interacts
- * with the OS scheduler when waiting for results from the GPU. Only one of
- * the scheduling flags can be set when creating a context.
- *
- * - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for
- * results from the GPU. This can decrease latency when waiting for the GPU,
- * but may lower the performance of CPU threads if they are performing work in
- * parallel with the CUDA thread.
- *
- * - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for
- * results from the GPU. This can increase latency when waiting for the GPU,
- * but can increase the performance of CPU threads performing work in parallel
- * with the GPU.
- *
- * - ::CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
- * synchronization primitive when waiting for the GPU to finish work.
- *
- * - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
- * synchronization primitive when waiting for the GPU to finish work. <br>
- * <b>Deprecated:</b> This flag was deprecated as of CUDA 4.0 and was
- * replaced with ::CU_CTX_SCHED_BLOCKING_SYNC.
- *
- * - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero,
- * uses a heuristic based on the number of active CUDA contexts in the
- * process \e C and the number of logical processors in the system \e P. If
- * \e C > \e P, then CUDA will yield to other OS threads when waiting for
- * the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while
- * waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN).
- * Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on
- * the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC
- * for low-powered devices.
- *
- * - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations.
- * This flag must be set in order to allocate pinned host memory that is
- * accessible to the GPU.
- *
- * - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory
- * after resizing local memory for a kernel. This can prevent thrashing by
- * local memory allocations when launching many kernels with high local
- * memory usage at the cost of potentially increased memory usage. <br>
- * <b>Deprecated:</b> This flag is deprecated and the behavior enabled
- * by this flag is now the default and cannot be disabled.
- * Instead, the per-thread stack size can be controlled with ::cuCtxSetLimit().
- *
- * - ::CU_CTX_COREDUMP_ENABLE: If GPU coredumps have not been enabled globally
- * with ::cuCoredumpSetAttributeGlobal or environment variables, this flag can
- * be set during context creation to instruct CUDA to create a coredump if
- * this context raises an exception during execution. These environment variables
- * are described in the CUDA-GDB user guide under the "GPU core dump support"
- * section.
- * The initial attributes will be taken from the global attributes at the time of
- * context creation. The other attributes that control coredump output can be
- * modified by calling ::cuCoredumpSetAttribute from the created context after
- * it becomes current.
- *
- * - ::CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU coredumps have not
- * been enabled globally with ::cuCoredumpSetAttributeGlobal or environment
- * variables, this flag can be set during context creation to instruct CUDA to
- * create a coredump if data is written to a certain pipe that is present in the
- * OS space. These environment variables are described in the CUDA-GDB user
- * guide under the "GPU core dump support" section.
- * It is important to note that the pipe name *must* be set with
- * ::cuCoredumpSetAttributeGlobal before creating the context if this flag is
- * used. Setting this flag implies that ::CU_CTX_COREDUMP_ENABLE is set.
- * The initial attributes will be taken from the global attributes at the time of
- * context creation. The other attributes that control coredump output can be
- * modified by calling ::cuCoredumpSetAttribute from the created context after
- * it becomes current.
- * Setting this flag on any context creation is equivalent to setting the
- * ::CU_COREDUMP_ENABLE_USER_TRIGGER attribute to \p true globally.
- *
- * - ::CU_CTX_SYNC_MEMOPS: Ensures that synchronous memory operations initiated
- * on this context will always synchronize. See further documentation in the
- * section titled "API Synchronization behavior" to learn more about cases when
- * synchronous memory operations can exhibit asynchronous behavior.
- *
- * Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode of
- * the device is ::CU_COMPUTEMODE_PROHIBITED. The function ::cuDeviceGetAttribute()
- * can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the
- * compute mode of the device. The <i>nvidia-smi</i> tool can be used to set
- * the compute mode for * devices.
- * Documentation for <i>nvidia-smi</i> can be obtained by passing a
- * -h option to it.
- *
- * \param pctx - Returned context handle of the new context
- * \param flags - Context creation flags
- * \param dev - Device to create context on
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCoredumpSetAttributeGlobal,
- * ::cuCoredumpSetAttribute,
- * ::cuCtxSynchronize
- */
- CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev);
- /**
- * \brief Create a CUDA context with execution affinity
- *
- * Creates a new CUDA context with execution affinity and associates it with
- * the calling thread. The \p paramsArray and \p flags parameter are described below.
- * The context is created with a usage count of 1 and the caller of ::cuCtxCreate() must
- * call ::cuCtxDestroy() when done using the context. If a context is already
- * current to the thread, it is supplanted by the newly created context and may
- * be restored by a subsequent call to ::cuCtxPopCurrent().
- *
- * The type and the amount of execution resource the context can use is limited by \p paramsArray
- * and \p numParams. The \p paramsArray is an array of \p CUexecAffinityParam and the \p numParams
- * describes the size of the array. If two \p CUexecAffinityParam in the array have the same type,
- * the latter execution affinity parameter overrides the former execution affinity parameter.
- * The supported execution affinity types are:
- * - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT limits the portion of SMs that the context can use. The portion
- * of SMs is specified as the number of SMs via \p CUexecAffinitySmCount. This limit will be internally
- * rounded up to the next hardware-supported amount. Hence, it is imperative to query the actual execution
- * affinity of the context via \p cuCtxGetExecAffinity after context creation. Currently, this attribute
- * is only supported under Volta+ MPS.
- *
- * The three LSBs of the \p flags parameter can be used to control how the OS
- * thread, which owns the CUDA context at the time of an API call, interacts
- * with the OS scheduler when waiting for results from the GPU. Only one of
- * the scheduling flags can be set when creating a context.
- *
- * - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for
- * results from the GPU. This can decrease latency when waiting for the GPU,
- * but may lower the performance of CPU threads if they are performing work in
- * parallel with the CUDA thread.
- *
- * - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for
- * results from the GPU. This can increase latency when waiting for the GPU,
- * but can increase the performance of CPU threads performing work in parallel
- * with the GPU.
- *
- * - ::CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
- * synchronization primitive when waiting for the GPU to finish work.
- *
- * - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
- * synchronization primitive when waiting for the GPU to finish work. <br>
- * <b>Deprecated:</b> This flag was deprecated as of CUDA 4.0 and was
- * replaced with ::CU_CTX_SCHED_BLOCKING_SYNC.
- *
- * - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero,
- * uses a heuristic based on the number of active CUDA contexts in the
- * process \e C and the number of logical processors in the system \e P. If
- * \e C > \e P, then CUDA will yield to other OS threads when waiting for
- * the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while
- * waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN).
- * Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on
- * the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC
- * for low-powered devices.
- *
- * - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations.
- * This flag must be set in order to allocate pinned host memory that is
- * accessible to the GPU.
- *
- * - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory
- * after resizing local memory for a kernel. This can prevent thrashing by
- * local memory allocations when launching many kernels with high local
- * memory usage at the cost of potentially increased memory usage. <br>
- * <b>Deprecated:</b> This flag is deprecated and the behavior enabled
- * by this flag is now the default and cannot be disabled.
- * Instead, the per-thread stack size can be controlled with ::cuCtxSetLimit().
- *
- * - ::CU_CTX_COREDUMP_ENABLE: If GPU coredumps have not been enabled globally
- * with ::cuCoredumpSetAttributeGlobal or environment variables, this flag can
- * be set during context creation to instruct CUDA to create a coredump if
- * this context raises an exception during execution. These environment variables
- * are described in the CUDA-GDB user guide under the "GPU core dump support"
- * section.
- * The initial attributes will be taken from the global attributes at the time of
- * context creation. The other attributes that control coredump output can be
- * modified by calling ::cuCoredumpSetAttribute from the created context after
- * it becomes current.
- *
- * - ::CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU coredumps have not
- * been enabled globally with ::cuCoredumpSetAttributeGlobal or environment
- * variables, this flag can be set during context creation to instruct CUDA to
- * create a coredump if data is written to a certain pipe that is present in the
- * OS space. These environment variables are described in the CUDA-GDB user
- * guide under the "GPU core dump support" section.
- * It is important to note that the pipe name *must* be set with
- * ::cuCoredumpSetAttributeGlobal before creating the context if this flag is
- * used. Setting this flag implies that ::CU_CTX_COREDUMP_ENABLE is set.
- * The initial attributes will be taken from the global attributes at the time of
- * context creation. The other attributes that control coredump output can be
- * modified by calling ::cuCoredumpSetAttribute from the created context after
- * it becomes current.
- * Setting this flag on any context creation is equivalent to setting the
- * ::CU_COREDUMP_ENABLE_USER_TRIGGER attribute to \p true globally.
- *
- * Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode of
- * the device is ::CU_COMPUTEMODE_PROHIBITED. The function ::cuDeviceGetAttribute()
- * can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the
- * compute mode of the device. The <i>nvidia-smi</i> tool can be used to set
- * the compute mode for * devices.
- * Documentation for <i>nvidia-smi</i> can be obtained by passing a
- * -h option to it.
- *
- * \param pctx - Returned context handle of the new context
- * \param paramsArray - Execution affinity parameters
- * \param numParams - Number of execution affinity parameters
- * \param flags - Context creation flags
- * \param dev - Device to create context on
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize,
- * ::cuCoredumpSetAttributeGlobal,
- * ::cuCoredumpSetAttribute,
- * ::CUexecAffinityParam
- */
- CUresult CUDAAPI cuCtxCreate_v3(CUcontext *pctx, CUexecAffinityParam *paramsArray, int numParams, unsigned int flags, CUdevice dev);
- /**
- * \brief Create a CUDA context
- *
- * Creates a new CUDA context and associates it with the calling thread. The
- * \p flags parameter is described below. The context is created with a usage
- * count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy()
- * when done using the context. If a context is already current to the thread,
- * it is supplanted by the newly created context and may be restored by a subsequent
- * call to ::cuCtxPopCurrent().
- *
- * CUDA context can be created with execution affinity. The type and the amount of
- execution resource the context can use is limited by \p paramsArray and \p numExecAffinityParams
- in \p execAffinity. The \p paramsArray is an array of \p CUexecAffinityParam and the \p numExecAffinityParams
- * describes the size of the paramsArray. If two \p CUexecAffinityParam in the array have the same type,
- * the latter execution affinity parameter overrides the former execution affinity parameter.
- * The supported execution affinity types are:
- * - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT limits the portion of SMs that the context can use. The portion
- * of SMs is specified as the number of SMs via \p CUexecAffinitySmCount. This limit will be internally
- * rounded up to the next hardware-supported amount. Hence, it is imperative to query the actual execution
- * affinity of the context via \p cuCtxGetExecAffinity after context creation. Currently, this attribute
- * is only supported under Volta+ MPS.
- *
- * CUDA context can be created in CIG(CUDA in Graphics) mode by setting \p cigParams.
- * Data from graphics client is shared with CUDA via the \p sharedData in \p cigParams.
- * Support for D3D12 graphics client can be determined using ::cuDeviceGetAttribute() with
- * ::CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED. \p sharedData is a ID3D12CommandQueue handle.
- * Either \p execAffinityParams or \p cigParams can be set to a non-null value. Setting both to a
- * non-null value will result in an undefined behavior.
- *
- * The three LSBs of the \p flags parameter can be used to control how the OS
- * thread, which owns the CUDA context at the time of an API call, interacts
- * with the OS scheduler when waiting for results from the GPU. Only one of
- * the scheduling flags can be set when creating a context.
- *
- * - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for
- * results from the GPU. This can decrease latency when waiting for the GPU,
- * but may lower the performance of CPU threads if they are performing work in
- * parallel with the CUDA thread.
- *
- * - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for
- * results from the GPU. This can increase latency when waiting for the GPU,
- * but can increase the performance of CPU threads performing work in parallel
- * with the GPU.
- *
- * - ::CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
- * synchronization primitive when waiting for the GPU to finish work.
- *
- * - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
- * synchronization primitive when waiting for the GPU to finish work. <br>
- * <b>Deprecated:</b> This flag was deprecated as of CUDA 4.0 and was
- * replaced with ::CU_CTX_SCHED_BLOCKING_SYNC.
- *
- * - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero,
- * uses a heuristic based on the number of active CUDA contexts in the
- * process \e C and the number of logical processors in the system \e P. If
- * \e C > \e P, then CUDA will yield to other OS threads when waiting for
- * the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while
- * waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN).
- * Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on
- * the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC
- * for low-powered devices.
- *
- * - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations.
- * This flag must be set in order to allocate pinned host memory that is
- * accessible to the GPU.
- *
- * - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory
- * after resizing local memory for a kernel. This can prevent thrashing by
- * local memory allocations when launching many kernels with high local
- * memory usage at the cost of potentially increased memory usage. <br>
- * <b>Deprecated:</b> This flag is deprecated and the behavior enabled
- * by this flag is now the default and cannot be disabled.
- * Instead, the per-thread stack size can be controlled with ::cuCtxSetLimit().
- *
- * - ::CU_CTX_COREDUMP_ENABLE: If GPU coredumps have not been enabled globally
- * with ::cuCoredumpSetAttributeGlobal or environment variables, this flag can
- * be set during context creation to instruct CUDA to create a coredump if
- * this context raises an exception during execution. These environment variables
- * are described in the CUDA-GDB user guide under the "GPU core dump support"
- * section.
- * The initial attributes will be taken from the global attributes at the time of
- * context creation. The other attributes that control coredump output can be
- * modified by calling ::cuCoredumpSetAttribute from the created context after
- * it becomes current. This flag is not supported when CUDA context is created in
- * CIG(CUDA in Graphics) mode.
- *
- * - ::CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU coredumps have not
- * been enabled globally with ::cuCoredumpSetAttributeGlobal or environment
- * variables, this flag can be set during context creation to instruct CUDA to
- * create a coredump if data is written to a certain pipe that is present in the
- * OS space. These environment variables are described in the CUDA-GDB user
- * guide under the "GPU core dump support" section.
- * It is important to note that the pipe name *must* be set with
- * ::cuCoredumpSetAttributeGlobal before creating the context if this flag is
- * used. Setting this flag implies that ::CU_CTX_COREDUMP_ENABLE is set.
- * The initial attributes will be taken from the global attributes at the time of
- * context creation. The other attributes that control coredump output can be
- * modified by calling ::cuCoredumpSetAttribute from the created context after
- * it becomes current.
- * Setting this flag on any context creation is equivalent to setting the
- * ::CU_COREDUMP_ENABLE_USER_TRIGGER attribute to \p true globally.
- * This flag is not supported when CUDA context is created in
- * CIG(CUDA in Graphics) mode.
- *
- * - ::CU_CTX_SYNC_MEMOPS: Ensures that synchronous memory operations initiated
- * on this context will always synchronize. See further documentation in the
- * section titled "API Synchronization behavior" to learn more about cases when
- * synchronous memory operations can exhibit asynchronous behavior.
- *
- * Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode of
- * the device is ::CU_COMPUTEMODE_PROHIBITED. The function ::cuDeviceGetAttribute()
- * can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the
- * compute mode of the device. The <i>nvidia-smi</i> tool can be used to set
- * the compute mode for * devices.
- * Documentation for <i>nvidia-smi</i> can be obtained by passing a
- * -h option to it.
- *
- * Context creation will fail with :: CUDA_ERROR_INVALID_VALUE if invalid parameter was
- * passed by client to create the CUDA context.
- *
- * Context creation in CIG mode will fail with ::CUDA_ERROR_NOT_SUPPORTED if CIG is not supported
- * by the device or the driver.
- * \param pctx - Returned context handle of the new context
- * \param ctxCreateParams - Context creation parameters
- * \param flags - Context creation flags
- * \param dev - Device to create context on
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCoredumpSetAttributeGlobal,
- * ::cuCoredumpSetAttribute,
- * ::cuCtxSynchronize
- */
- CUresult CUDAAPI cuCtxCreate_v4(CUcontext *pctx, CUctxCreateParams *ctxCreateParams, unsigned int flags, CUdevice dev);
- /**
- * \brief Destroy a CUDA context
- *
- * Destroys the CUDA context specified by \p ctx. The context \p ctx will be
- * destroyed regardless of how many threads it is current to.
- * It is the responsibility of the calling function to ensure that no API
- * call issues using \p ctx while ::cuCtxDestroy() is executing.
- *
- * Destroys and cleans up all resources associated with the context.
- * It is the caller's responsibility to ensure that the context or its resources
- * are not accessed or passed in subsequent API calls and doing so will result in undefined behavior.
- * These resources include CUDA types ::CUmodule, ::CUfunction, ::CUstream, ::CUevent,
- * ::CUarray, ::CUmipmappedArray, ::CUtexObject, ::CUsurfObject, ::CUtexref, ::CUsurfref,
- * ::CUgraphicsResource, ::CUlinkState, ::CUexternalMemory and ::CUexternalSemaphore.
- * These resources also include memory allocations by ::cuMemAlloc(), ::cuMemAllocHost(),
- * ::cuMemAllocManaged() and ::cuMemAllocPitch().
- *
- * If \p ctx is current to the calling thread then \p ctx will also be
- * popped from the current thread's context stack (as though ::cuCtxPopCurrent()
- * were called). If \p ctx is current to other threads, then \p ctx will
- * remain current to those threads, and attempting to access \p ctx from
- * those threads will result in the error ::CUDA_ERROR_CONTEXT_IS_DESTROYED.
- *
- * \note ::cuCtxDestroy() will not destroy memory allocations by ::cuMemCreate(), ::cuMemAllocAsync() and
- * ::cuMemAllocFromPoolAsync(). These memory allocations are not associated with any CUDA context and need to
- * be destroyed explicitly.
- *
- * \param ctx - Context to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize
- */
- CUresult CUDAAPI cuCtxDestroy(CUcontext ctx);
- /**
- * \brief Pushes a context on the current CPU thread
- *
- * Pushes the given context \p ctx onto the CPU thread's stack of current
- * contexts. The specified context becomes the CPU thread's current context, so
- * all CUDA functions that operate on the current context are affected.
- *
- * The previous current context may be made current again by calling
- * ::cuCtxDestroy() or ::cuCtxPopCurrent().
- *
- * \param ctx - Context to push
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize
- */
- CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx);
- /**
- * \brief Pops the current CUDA context from the current CPU thread.
- *
- * Pops the current CUDA context from the CPU thread and passes back the
- * old context handle in \p *pctx. That context may then be made current
- * to a different CPU thread by calling ::cuCtxPushCurrent().
- *
- * If a context was current to the CPU thread before ::cuCtxCreate() or
- * ::cuCtxPushCurrent() was called, this function makes that context current to
- * the CPU thread again.
- *
- * \param pctx - Returned popped context handle
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize
- */
- CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx);
- /**
- * \brief Binds the specified CUDA context to the calling CPU thread
- *
- * Binds the specified CUDA context to the calling CPU thread.
- * If \p ctx is NULL then the CUDA context previously bound to the
- * calling CPU thread is unbound and ::CUDA_SUCCESS is returned.
- *
- * If there exists a CUDA context stack on the calling CPU thread, this
- * will replace the top of that stack with \p ctx.
- * If \p ctx is NULL then this will be equivalent to popping the top
- * of the calling CPU thread's CUDA context stack (or a no-op if the
- * calling CPU thread's CUDA context stack is empty).
- *
- * \param ctx - Context to bind to the calling CPU thread
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT
- * \notefnerr
- *
- * \sa
- * ::cuCtxGetCurrent,
- * ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cudaSetDevice
- */
- CUresult CUDAAPI cuCtxSetCurrent(CUcontext ctx);
- /**
- * \brief Returns the CUDA context bound to the calling CPU thread.
- *
- * Returns in \p *pctx the CUDA context bound to the calling CPU thread.
- * If no context is bound to the calling CPU thread then \p *pctx is
- * set to NULL and ::CUDA_SUCCESS is returned.
- *
- * \param pctx - Returned context handle
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * \notefnerr
- *
- * \sa
- * ::cuCtxSetCurrent,
- * ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cudaGetDevice
- */
- CUresult CUDAAPI cuCtxGetCurrent(CUcontext *pctx);
- /**
- * \brief Returns the device handle for the current context
- *
- * Returns in \p *device the handle of the current context's device.
- *
- * \param device - Returned device handle for the current context
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize,
- * ::cudaGetDevice
- */
- CUresult CUDAAPI cuCtxGetDevice(CUdevice *device);
- /**
- * \brief Returns the flags for the current context
- *
- * Returns in \p *flags the flags of the current context. See ::cuCtxCreate
- * for flag values.
- *
- * \param flags - Pointer to store flags of current context
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetCurrent,
- * ::cuCtxGetDevice,
- * ::cuCtxGetLimit,
- * ::cuCtxGetSharedMemConfig,
- * ::cuCtxGetStreamPriorityRange,
- * ::cuCtxSetFlags,
- * ::cudaGetDeviceFlags
- */
- CUresult CUDAAPI cuCtxGetFlags(unsigned int *flags);
- /**
- * \brief Sets the flags for the current context
- *
- * Sets the flags for the current context overwriting previously set ones. See
- * ::cuDevicePrimaryCtxSetFlags for flag values.
- *
- * \param flags - Flags to set on the current context
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetCurrent,
- * ::cuCtxGetDevice,
- * ::cuCtxGetLimit,
- * ::cuCtxGetSharedMemConfig,
- * ::cuCtxGetStreamPriorityRange,
- * ::cuCtxGetFlags,
- * ::cudaGetDeviceFlags,
- * ::cuDevicePrimaryCtxSetFlags,
- */
- CUresult CUDAAPI cuCtxSetFlags(unsigned int flags);
- /**
- * \brief Returns the unique Id associated with the context supplied
- *
- * Returns in \p ctxId the unique Id which is associated with a given context.
- * The Id is unique for the life of the program for this instance of CUDA.
- * If context is supplied as NULL and there is one current, the Id of the
- * current context is returned.
- *
- * \param ctx - Context for which to obtain the Id
- * \param ctxId - Pointer to store the Id of the context
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPushCurrent
- */
- CUresult CUDAAPI cuCtxGetId(CUcontext ctx, unsigned long long *ctxId);
- /**
- * \brief Block for the current context's tasks to complete
- *
- * Blocks until the current context has completed all preceding requested tasks.
- * If the current context is the primary context, green contexts that have been
- * created will also be synchronized.
- * ::cuCtxSynchronize() returns an error if one of the preceding tasks failed.
- * If the context was created with the ::CU_CTX_SCHED_BLOCKING_SYNC flag, the
- * CPU thread will block until the GPU context has finished its work.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cudaDeviceSynchronize
- */
- CUresult CUDAAPI cuCtxSynchronize(void);
- /**
- * \brief Set resource limits
- *
- * Setting \p limit to \p value is a request by the application to update
- * the current limit maintained by the context. The driver is free to
- * modify the requested value to meet h/w requirements (this could be
- * clamping to minimum or maximum values, rounding up to nearest element
- * size, etc). The application can use ::cuCtxGetLimit() to find out exactly
- * what the limit has been set to.
- *
- * Setting each ::CUlimit has its own specific restrictions, so each is
- * discussed here.
- *
- * - ::CU_LIMIT_STACK_SIZE controls the stack size in bytes of each GPU thread.
- * The driver automatically increases the per-thread stack size
- * for each kernel launch as needed. This size isn't reset back to the
- * original value after each launch. Setting this value will take effect
- * immediately, and if necessary, the device will block until all preceding
- * requested tasks are complete.
- *
- * - ::CU_LIMIT_PRINTF_FIFO_SIZE controls the size in bytes of the FIFO used
- * by the ::printf() device system call. Setting ::CU_LIMIT_PRINTF_FIFO_SIZE
- * must be performed before launching any kernel that uses the ::printf()
- * device system call, otherwise ::CUDA_ERROR_INVALID_VALUE will be returned.
- *
- * - ::CU_LIMIT_MALLOC_HEAP_SIZE controls the size in bytes of the heap used
- * by the ::malloc() and ::free() device system calls. Setting
- * ::CU_LIMIT_MALLOC_HEAP_SIZE must be performed before launching any kernel
- * that uses the ::malloc() or ::free() device system calls, otherwise
- * ::CUDA_ERROR_INVALID_VALUE will be returned.
- *
- * - ::CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH controls the maximum nesting depth of
- * a grid at which a thread can safely call ::cudaDeviceSynchronize(). Setting
- * this limit must be performed before any launch of a kernel that uses the
- * device runtime and calls ::cudaDeviceSynchronize() above the default sync
- * depth, two levels of grids. Calls to ::cudaDeviceSynchronize() will fail
- * with error code ::cudaErrorSyncDepthExceeded if the limitation is
- * violated. This limit can be set smaller than the default or up the maximum
- * launch depth of 24. When setting this limit, keep in mind that additional
- * levels of sync depth require the driver to reserve large amounts of device
- * memory which can no longer be used for user allocations. If these
- * reservations of device memory fail, ::cuCtxSetLimit() will return
- * ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value.
- * This limit is only applicable to devices of compute capability < 9.0.
- * Attempting to set this limit on devices of other compute capability
- * versions will result in the error ::CUDA_ERROR_UNSUPPORTED_LIMIT being
- * returned.
- *
- * - ::CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT controls the maximum number of
- * outstanding device runtime launches that can be made from the current
- * context. A grid is outstanding from the point of launch up until the grid
- * is known to have been completed. Device runtime launches which violate
- * this limitation fail and return ::cudaErrorLaunchPendingCountExceeded when
- * ::cudaGetLastError() is called after launch. If more pending launches than
- * the default (2048 launches) are needed for a module using the device
- * runtime, this limit can be increased. Keep in mind that being able to
- * sustain additional pending launches will require the driver to reserve
- * larger amounts of device memory upfront which can no longer be used for
- * allocations. If these reservations fail, ::cuCtxSetLimit() will return
- * ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value.
- * This limit is only applicable to devices of compute capability 3.5 and
- * higher. Attempting to set this limit on devices of compute capability less
- * than 3.5 will result in the error ::CUDA_ERROR_UNSUPPORTED_LIMIT being
- * returned.
- *
- * - ::CU_LIMIT_MAX_L2_FETCH_GRANULARITY controls the L2 cache fetch granularity.
- * Values can range from 0B to 128B. This is purely a performance hint and
- * it can be ignored or clamped depending on the platform.
- *
- * - ::CU_LIMIT_PERSISTING_L2_CACHE_SIZE controls size in bytes available for
- * persisting L2 cache. This is purely a performance hint and it can be
- * ignored or clamped depending on the platform.
- *
- * \param limit - Limit to set
- * \param value - Size of limit
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNSUPPORTED_LIMIT,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_INVALID_CONTEXT
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSynchronize,
- * ::cudaDeviceSetLimit
- */
- CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value);
- /**
- * \brief Returns resource limits
- *
- * Returns in \p *pvalue the current size of \p limit. The supported
- * ::CUlimit values are:
- * - ::CU_LIMIT_STACK_SIZE: stack size in bytes of each GPU thread.
- * - ::CU_LIMIT_PRINTF_FIFO_SIZE: size in bytes of the FIFO used by the
- * ::printf() device system call.
- * - ::CU_LIMIT_MALLOC_HEAP_SIZE: size in bytes of the heap used by the
- * ::malloc() and ::free() device system calls.
- * - ::CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH: maximum grid depth at which a thread
- * can issue the device runtime call ::cudaDeviceSynchronize() to wait on
- * child grid launches to complete.
- * - ::CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: maximum number of outstanding
- * device runtime launches that can be made from this context.
- * - ::CU_LIMIT_MAX_L2_FETCH_GRANULARITY: L2 cache fetch granularity.
- * - ::CU_LIMIT_PERSISTING_L2_CACHE_SIZE: Persisting L2 cache size in bytes
- *
- * \param limit - Limit to query
- * \param pvalue - Returned size of limit
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNSUPPORTED_LIMIT
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize,
- * ::cudaDeviceGetLimit
- */
- CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit);
- /**
- * \brief Returns the preferred cache configuration for the current context.
- *
- * On devices where the L1 cache and shared memory use the same hardware
- * resources, this function returns through \p pconfig the preferred cache configuration
- * for the current context. This is only a preference. The driver will use
- * the requested configuration if possible, but it is free to choose a different
- * configuration if required to execute functions.
- *
- * This will return a \p pconfig of ::CU_FUNC_CACHE_PREFER_NONE on devices
- * where the size of the L1 cache and shared memory are fixed.
- *
- * The supported cache configurations are:
- * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
- * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
- * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
- * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
- *
- * \param pconfig - Returned cache configuration
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize,
- * ::cuFuncSetCacheConfig,
- * ::cudaDeviceGetCacheConfig
- */
- CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig);
- /**
- * \brief Sets the preferred cache configuration for the current context.
- *
- * On devices where the L1 cache and shared memory use the same hardware
- * resources, this sets through \p config the preferred cache configuration for
- * the current context. This is only a preference. The driver will use
- * the requested configuration if possible, but it is free to choose a different
- * configuration if required to execute the function. Any function preference
- * set via ::cuFuncSetCacheConfig() or ::cuKernelSetCacheConfig() will be preferred over this context-wide
- * setting. Setting the context-wide cache configuration to
- * ::CU_FUNC_CACHE_PREFER_NONE will cause subsequent kernel launches to prefer
- * to not change the cache configuration unless required to launch the kernel.
- *
- * This setting does nothing on devices where the size of the L1 cache and
- * shared memory are fixed.
- *
- * Launching a kernel with a different preference than the most recent
- * preference setting may insert a device-side synchronization point.
- *
- * The supported cache configurations are:
- * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
- * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
- * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
- * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
- *
- * \param config - Requested cache configuration
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize,
- * ::cuFuncSetCacheConfig,
- * ::cudaDeviceSetCacheConfig,
- * ::cuKernelSetCacheConfig
- */
- CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config);
- /**
- * \brief Gets the context's API version.
- *
- * Returns a version number in \p version corresponding to the capabilities of
- * the context (e.g. 3010 or 3020), which library developers can use to direct
- * callers to a specific API version. If \p ctx is NULL, returns the API version
- * used to create the currently bound context.
- *
- * Note that new API versions are only introduced when context capabilities are
- * changed that break binary compatibility, so the API version and driver version
- * may be different. For example, it is valid for the API version to be 3020 while
- * the driver version is 4020.
- *
- * \param ctx - Context to check
- * \param version - Pointer to version
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize
- */
- CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version);
- /**
- * \brief Returns numerical values that correspond to the least and
- * greatest stream priorities.
- *
- * Returns in \p *leastPriority and \p *greatestPriority the numerical values that correspond
- * to the least and greatest stream priorities respectively. Stream priorities
- * follow a convention where lower numbers imply greater priorities. The range of
- * meaningful stream priorities is given by [\p *greatestPriority, \p *leastPriority].
- * If the user attempts to create a stream with a priority value that is
- * outside the meaningful range as specified by this API, the priority is
- * automatically clamped down or up to either \p *leastPriority or \p *greatestPriority
- * respectively. See ::cuStreamCreateWithPriority for details on creating a
- * priority stream.
- * A NULL may be passed in for \p *leastPriority or \p *greatestPriority if the value
- * is not desired.
- *
- * This function will return '0' in both \p *leastPriority and \p *greatestPriority if
- * the current context's device does not support stream priorities
- * (see ::cuDeviceGetAttribute).
- *
- * \param leastPriority - Pointer to an int in which the numerical value for least
- * stream priority is returned
- * \param greatestPriority - Pointer to an int in which the numerical value for greatest
- * stream priority is returned
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- * \sa ::cuStreamCreateWithPriority,
- * ::cuStreamGetPriority,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize,
- * ::cudaDeviceGetStreamPriorityRange
- */
- CUresult CUDAAPI cuCtxGetStreamPriorityRange(int *leastPriority, int *greatestPriority);
- /**
- * \brief Resets all persisting lines in cache to normal status.
- *
- * ::cuCtxResetPersistingL2Cache Resets all persisting lines in cache to normal
- * status. Takes effect on function return.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa
- * ::CUaccessPolicyWindow
- */
- CUresult CUDAAPI cuCtxResetPersistingL2Cache(void);
- /**
- * \brief Returns the execution affinity setting for the current context.
- *
- * Returns in \p *pExecAffinity the current value of \p type. The supported
- * ::CUexecAffinityType values are:
- * - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT: number of SMs the context is limited to use.
- *
- * \param type - Execution affinity type to query
- * \param pExecAffinity - Returned execution affinity
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY
- * \notefnerr
- *
- * \sa
- * ::CUexecAffinityParam
- */
- CUresult CUDAAPI cuCtxGetExecAffinity(CUexecAffinityParam *pExecAffinity, CUexecAffinityType type);
- /**
- * \brief Records an event.
- *
- * Captures in \p hEvent all the activities of the context \p hCtx
- * at the time of this call. \p hEvent and \p hCtx must be from the same
- * CUDA context, otherwise ::CUDA_ERROR_INVALID_HANDLE will be returned.
- * Calls such as ::cuEventQuery() or ::cuCtxWaitEvent() will then examine
- * or wait for completion of the work that was captured.
- * Uses of \p hCtx after this call do not modify \p hEvent.
- * If the context passed to \p hCtx is the primary context, \p hEvent will
- * capture all the activities of the primary context and its green contexts.
- * If the context passed to \p hCtx is a context converted from green context
- * via ::cuCtxFromGreenCtx(), \p hEvent will capture only the activities of the green context.
- *
- * \note The API will return ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED if the
- * specified context \p hCtx has a stream in the capture mode. In such a case,
- * the call will invalidate all the conflicting captures.
- *
- * \param hCtx - Context to record event for
- * \param hEvent - Event to record
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED
- *
- * \sa
- * ::cuCtxWaitEvent,
- * ::cuGreenCtxRecordEvent,
- * ::cuGreenCtxWaitEvent,
- * ::cuEventRecord
- */
- CUresult CUDAAPI cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent);
- /**
- * \brief Make a context wait on an event
- *
- * Makes all future work submitted to context \p hCtx wait for all work
- * captured in \p hEvent. The synchronization will be performed on the device
- * and will not block the calling CPU thread. See ::cuCtxRecordEvent()
- * for details on what is captured by an event.
- * If the context passed to \p hCtx is the primary context, the primary context
- * and its green contexts will wait for \p hEvent.
- * If the context passed to \p hCtx is a context converted from green context
- * via ::cuCtxFromGreenCtx(), the green context will wait for \p hEvent.
- *
- * \note \p hEvent may be from a different context or device than \p hCtx.
- *
- * \note The API will return ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED and
- * invalidate the capture if the specified event \p hEvent is part of an ongoing
- * capture sequence or if the specified context \p hCtx has a stream in the capture mode.
- *
- * \param hCtx - Context to wait
- * \param hEvent - Event to wait on
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED
- *
- * \sa
- * ::cuCtxRecordEvent,
- * ::cuGreenCtxRecordEvent,
- * ::cuGreenCtxWaitEvent,
- * ::cuStreamWaitEvent
- */
- CUresult CUDAAPI cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent);
- /** @} */ /* END CUDA_CTX */
- /**
- * \defgroup CUDA_CTX_DEPRECATED Context Management [DEPRECATED]
- *
- * ___MANBRIEF___ deprecated context management functions of the low-level CUDA
- * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the deprecated context management functions of the low-level
- * CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Increment a context's usage-count
- *
- * \deprecated
- *
- * Note that this function is deprecated and should not be used.
- *
- * Increments the usage count of the context and passes back a context handle
- * in \p *pctx that must be passed to ::cuCtxDetach() when the application is
- * done with the context. ::cuCtxAttach() fails if there is no context current
- * to the thread.
- *
- * Currently, the \p flags parameter must be 0.
- *
- * \param pctx - Returned context handle of the current context
- * \param flags - Context attach flags (must be 0)
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxDetach,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, unsigned int flags);
- /**
- * \brief Decrement a context's usage-count
- *
- * \deprecated
- *
- * Note that this function is deprecated and should not be used.
- *
- * Decrements the usage count of the context \p ctx, and destroys the context
- * if the usage count goes to 0. The context must be a handle that was passed
- * back by ::cuCtxCreate() or ::cuCtxAttach(), and must be current to the
- * calling thread.
- *
- * \param ctx - Context to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx);
- /**
- * \brief Returns the current shared memory configuration for the current context.
- *
- * \deprecated
- *
- * This function will return in \p pConfig the current size of shared memory banks
- * in the current context. On devices with configurable shared memory banks,
- * ::cuCtxSetSharedMemConfig can be used to change this setting, so that all
- * subsequent kernel launches will by default use the new bank size. When
- * ::cuCtxGetSharedMemConfig is called on devices without configurable shared
- * memory, it will return the fixed bank size of the hardware.
- *
- * The returned bank configurations can be either:
- * - ::CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: shared memory bank width is
- * four bytes.
- * - ::CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: shared memory bank width will
- * eight bytes.
- *
- * \param pConfig - returned shared memory configuration
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize,
- * ::cuCtxGetSharedMemConfig,
- * ::cuFuncSetCacheConfig,
- * ::cudaDeviceGetSharedMemConfig
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig);
- /**
- * \brief Sets the shared memory configuration for the current context.
- *
- * \deprecated
- *
- * On devices with configurable shared memory banks, this function will set
- * the context's shared memory bank size which is used for subsequent kernel
- * launches.
- *
- * Changed the shared memory configuration between launches may insert a device
- * side synchronization point between those launches.
- *
- * Changing the shared memory bank size will not increase shared memory usage
- * or affect occupancy of kernels, but may have major effects on performance.
- * Larger bank sizes will allow for greater potential bandwidth to shared memory,
- * but will change what kinds of accesses to shared memory will result in bank
- * conflicts.
- *
- * This function will do nothing on devices with fixed shared memory bank size.
- *
- * The supported bank configurations are:
- * - ::CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: set bank width to the default initial
- * setting (currently, four bytes).
- * - ::CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width to
- * be natively four bytes.
- * - ::CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank width to
- * be natively eight bytes.
- *
- * \param config - requested shared memory configuration
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuCtxCreate,
- * ::cuCtxDestroy,
- * ::cuCtxGetApiVersion,
- * ::cuCtxGetCacheConfig,
- * ::cuCtxGetDevice,
- * ::cuCtxGetFlags,
- * ::cuCtxGetLimit,
- * ::cuCtxPopCurrent,
- * ::cuCtxPushCurrent,
- * ::cuCtxSetLimit,
- * ::cuCtxSynchronize,
- * ::cuCtxGetSharedMemConfig,
- * ::cuFuncSetCacheConfig,
- * ::cudaDeviceSetSharedMemConfig
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuCtxSetSharedMemConfig(CUsharedconfig config);
- /** @} */ /* END CUDA_CTX_DEPRECATED */
- /**
- * \defgroup CUDA_MODULE Module Management
- *
- * ___MANBRIEF___ module management functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the module management functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Loads a compute module
- *
- * Takes a filename \p fname and loads the corresponding module \p module into
- * the current context. The CUDA driver API does not attempt to lazily
- * allocate the resources needed by a module; if the memory for functions and
- * data (constant and global) needed by the module cannot be allocated,
- * ::cuModuleLoad() fails. The file should be a \e cubin file as output by
- * \b nvcc, or a \e PTX file either as output by \b nvcc or handwritten, or
- * a \e fatbin file as output by \b nvcc from toolchain 4.0 or later.
- *
- * \param module - Returned module
- * \param fname - Filename of module to load
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_PTX,
- * ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION,
- * ::CUDA_ERROR_NOT_FOUND,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_FILE_NOT_FOUND,
- * ::CUDA_ERROR_NO_BINARY_FOR_GPU,
- * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
- * ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND
- * \notefnerr
- *
- * \sa ::cuModuleGetFunction,
- * ::cuModuleGetGlobal,
- * ::cuModuleGetTexRef,
- * ::cuModuleLoadData,
- * ::cuModuleLoadDataEx,
- * ::cuModuleLoadFatBinary,
- * ::cuModuleUnload
- */
- CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname);
- /**
- * \brief Load a module's data
- *
- * Takes a pointer \p image and loads the corresponding module \p module into
- * the current context. The \p image may be a \e cubin or \e fatbin
- * as output by \b nvcc, or a NULL-terminated \e PTX, either as output by \b nvcc
- * or hand-written.
- *
- * \param module - Returned module
- * \param image - Module data to load
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_PTX,
- * ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NO_BINARY_FOR_GPU,
- * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
- * ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND
- * \notefnerr
- *
- * \sa ::cuModuleGetFunction,
- * ::cuModuleGetGlobal,
- * ::cuModuleGetTexRef,
- * ::cuModuleLoad,
- * ::cuModuleLoadDataEx,
- * ::cuModuleLoadFatBinary,
- * ::cuModuleUnload
- */
- CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image);
- /**
- * \brief Load a module's data with options
- *
- * Takes a pointer \p image and loads the corresponding module \p module into
- * the current context. The \p image may be a \e cubin or \e fatbin
- * as output by \b nvcc, or a NULL-terminated \e PTX, either as output by \b nvcc
- * or hand-written.
- *
- * \param module - Returned module
- * \param image - Module data to load
- * \param numOptions - Number of options
- * \param options - Options for JIT
- * \param optionValues - Option values for JIT
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_PTX,
- * ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NO_BINARY_FOR_GPU,
- * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
- * ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND
- * \notefnerr
- *
- * \sa ::cuModuleGetFunction,
- * ::cuModuleGetGlobal,
- * ::cuModuleGetTexRef,
- * ::cuModuleLoad,
- * ::cuModuleLoadData,
- * ::cuModuleLoadFatBinary,
- * ::cuModuleUnload
- */
- CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
- /**
- * \brief Load a module's data
- *
- * Takes a pointer \p fatCubin and loads the corresponding module \p module
- * into the current context. The pointer represents a <i>fat binary</i> object,
- * which is a collection of different \e cubin and/or \e PTX files, all
- * representing the same device code, but compiled and optimized for different
- * architectures.
- *
- * Prior to CUDA 4.0, there was no documented API for constructing and using
- * fat binary objects by programmers. Starting with CUDA 4.0, fat binary
- * objects can be constructed by providing the <i>-fatbin option</i> to \b nvcc.
- * More information can be found in the \b nvcc document.
- *
- * \param module - Returned module
- * \param fatCubin - Fat binary to load
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_PTX,
- * ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION,
- * ::CUDA_ERROR_NOT_FOUND,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NO_BINARY_FOR_GPU,
- * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
- * ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND
- * \notefnerr
- *
- * \sa ::cuModuleGetFunction,
- * ::cuModuleGetGlobal,
- * ::cuModuleGetTexRef,
- * ::cuModuleLoad,
- * ::cuModuleLoadData,
- * ::cuModuleLoadDataEx,
- * ::cuModuleUnload
- */
- CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin);
- /**
- * \brief Unloads a module
- *
- * Unloads a module \p hmod from the current context. Attempting to unload
- * a module which was obtained from the Library Management API such as
- * ::cuLibraryGetModule will return ::CUDA_ERROR_NOT_PERMITTED.
- *
- * \param hmod - Module to unload
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_PERMITTED
- * \notefnerr
- * \note_destroy_ub
- *
- * \sa ::cuModuleGetFunction,
- * ::cuModuleGetGlobal,
- * ::cuModuleGetTexRef,
- * ::cuModuleLoad,
- * ::cuModuleLoadData,
- * ::cuModuleLoadDataEx,
- * ::cuModuleLoadFatBinary
- */
- CUresult CUDAAPI cuModuleUnload(CUmodule hmod);
- /**
- * CUDA Lazy Loading status
- */
- typedef enum CUmoduleLoadingMode_enum {
- CU_MODULE_EAGER_LOADING = 0x1, /**< Lazy Kernel Loading is not enabled */
- CU_MODULE_LAZY_LOADING = 0x2, /**< Lazy Kernel Loading is enabled */
- } CUmoduleLoadingMode;
- /**
- * \brief Query lazy loading mode
- *
- * Returns lazy loading mode
- * Module loading mode is controlled by CUDA_MODULE_LOADING env variable
- *
- * \param mode - Returns the lazy loading mode
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- * \sa
- * ::cuModuleLoad,
- */
- CUresult CUDAAPI cuModuleGetLoadingMode(CUmoduleLoadingMode *mode);
- /**
- * \brief Returns a function handle
- *
- * Returns in \p *hfunc the handle of the function of name \p name located in
- * module \p hmod. If no function of that name exists, ::cuModuleGetFunction()
- * returns ::CUDA_ERROR_NOT_FOUND.
- *
- * \param hfunc - Returned function handle
- * \param hmod - Module to retrieve function from
- * \param name - Name of function to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_FOUND
- * \notefnerr
- *
- * \sa ::cuModuleGetGlobal,
- * ::cuModuleGetTexRef,
- * ::cuModuleLoad,
- * ::cuModuleLoadData,
- * ::cuModuleLoadDataEx,
- * ::cuModuleLoadFatBinary,
- * ::cuModuleUnload
- */
- CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
- /**
- * \brief Returns the number of functions within a module
- *
- * Returns in \p count the number of functions in \p mod.
- *
- * \param count - Number of functions found within the module
- * \param mod - Module to query
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- */
- CUresult CUDAAPI cuModuleGetFunctionCount(unsigned int *count, CUmodule mod);
- /**
- * \brief Returns the function handles within a module.
- *
- * Returns in \p functions a maximum number of \p numFunctions function handles within \p mod. When
- * function loading mode is set to LAZY the function retrieved may be partially loaded. The loading
- * state of a function can be queried using ::cuFunctionIsLoaded. CUDA APIs may load the function
- * automatically when called with partially loaded function handle which may incur additional
- * latency. Alternatively, ::cuFunctionLoad can be used to explicitly load a function. The returned
- * function handles become invalid when the module is unloaded.
- *
- * \param functions - Buffer where the function handles are returned to
- * \param numFunctions - Maximum number of function handles may be returned to the buffer
- * \param mod - Module to query from
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuModuleGetFunction,
- * ::cuModuleGetFunctionCount,
- * ::cuFuncIsLoaded,
- * ::cuFuncLoad
- */
- CUresult CUDAAPI cuModuleEnumerateFunctions(CUfunction *functions, unsigned int numFunctions, CUmodule mod);
- /**
- * \brief Returns a global pointer from a module
- *
- * Returns in \p *dptr and \p *bytes the base pointer and size of the
- * global of name \p name located in module \p hmod. If no variable of that name
- * exists, ::cuModuleGetGlobal() returns ::CUDA_ERROR_NOT_FOUND.
- * One of the parameters \p dptr or \p bytes (not both) can be NULL in which
- * case it is ignored.
- *
- * \param dptr - Returned global device pointer
- * \param bytes - Returned global size in bytes
- * \param hmod - Module to retrieve global from
- * \param name - Name of global to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_FOUND
- * \notefnerr
- *
- * \sa ::cuModuleGetFunction,
- * ::cuModuleGetTexRef,
- * ::cuModuleLoad,
- * ::cuModuleLoadData,
- * ::cuModuleLoadDataEx,
- * ::cuModuleLoadFatBinary,
- * ::cuModuleUnload,
- * ::cudaGetSymbolAddress,
- * ::cudaGetSymbolSize
- */
- CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
- /**
- * \brief Creates a pending JIT linker invocation.
- *
- * If the call is successful, the caller owns the returned CUlinkState, which
- * should eventually be destroyed with ::cuLinkDestroy. The
- * device code machine size (32 or 64 bit) will match the calling application.
- *
- * Both linker and compiler options may be specified. Compiler options will
- * be applied to inputs to this linker action which must be compiled from PTX.
- * The options ::CU_JIT_WALL_TIME,
- * ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, and ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
- * will accumulate data until the CUlinkState is destroyed.
- *
- * The data passed in via ::cuLinkAddData and ::cuLinkAddFile will be treated
- * as relocatable (-rdc=true to nvcc) when linking the final cubin during
- * ::cuLinkComplete and will have similar consequences as offline relocatable
- * device code linking.
- *
- * \p optionValues must remain valid for the life of the CUlinkState if output
- * options are used. No other references to inputs are maintained after this
- * call returns.
- *
- * \note For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted
- *
- * \param numOptions Size of options arrays
- * \param options Array of linker and compiler options
- * \param optionValues Array of option values, each cast to void *
- * \param stateOut On success, this will contain a CUlinkState to specify
- * and complete this action
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND
- * \notefnerr
- *
- * \sa ::cuLinkAddData,
- * ::cuLinkAddFile,
- * ::cuLinkComplete,
- * ::cuLinkDestroy
- */
- CUresult CUDAAPI
- cuLinkCreate(unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut);
- /**
- * \brief Add an input to a pending linker invocation
- *
- * Ownership of \p data is retained by the caller. No reference is retained to any
- * inputs after this call returns.
- *
- * This method accepts only compiler options, which are used if the data must
- * be compiled from PTX, and does not accept any of
- * ::CU_JIT_WALL_TIME, ::CU_JIT_INFO_LOG_BUFFER, ::CU_JIT_ERROR_LOG_BUFFER,
- * ::CU_JIT_TARGET_FROM_CUCONTEXT, or ::CU_JIT_TARGET.
- *
- * \note For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted
- *
- * \param state A pending linker action.
- * \param type The type of the input data.
- * \param data The input data. PTX must be NULL-terminated.
- * \param size The length of the input data.
- * \param name An optional name for this input in log messages.
- * \param numOptions Size of options.
- * \param options Options to be applied only for this input (overrides options from ::cuLinkCreate).
- * \param optionValues Array of option values, each cast to void *.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_IMAGE,
- * ::CUDA_ERROR_INVALID_PTX,
- * ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NO_BINARY_FOR_GPU
- *
- * \sa ::cuLinkCreate,
- * ::cuLinkAddFile,
- * ::cuLinkComplete,
- * ::cuLinkDestroy
- */
- CUresult CUDAAPI
- cuLinkAddData(CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name,
- unsigned int numOptions, CUjit_option *options, void **optionValues);
- /**
- * \brief Add a file input to a pending linker invocation
- *
- * No reference is retained to any inputs after this call returns.
- *
- * This method accepts only compiler options, which are used if the input
- * must be compiled from PTX, and does not accept any of
- * ::CU_JIT_WALL_TIME, ::CU_JIT_INFO_LOG_BUFFER, ::CU_JIT_ERROR_LOG_BUFFER,
- * ::CU_JIT_TARGET_FROM_CUCONTEXT, or ::CU_JIT_TARGET.
- *
- * This method is equivalent to invoking ::cuLinkAddData on the contents
- * of the file.
- *
- * \note For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted
- *
- * \param state A pending linker action
- * \param type The type of the input data
- * \param path Path to the input file
- * \param numOptions Size of options
- * \param options Options to be applied only for this input (overrides options from ::cuLinkCreate)
- * \param optionValues Array of option values, each cast to void *
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_FILE_NOT_FOUND
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_IMAGE,
- * ::CUDA_ERROR_INVALID_PTX,
- * ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NO_BINARY_FOR_GPU
- *
- * \sa ::cuLinkCreate,
- * ::cuLinkAddData,
- * ::cuLinkComplete,
- * ::cuLinkDestroy
- */
- CUresult CUDAAPI
- cuLinkAddFile(CUlinkState state, CUjitInputType type, const char *path,
- unsigned int numOptions, CUjit_option *options, void **optionValues);
- /**
- * \brief Complete a pending linker invocation
- *
- * Completes the pending linker action and returns the cubin image for the linked
- * device code, which can be used with ::cuModuleLoadData. The cubin is owned by
- * \p state, so it should be loaded before \p state is destroyed via ::cuLinkDestroy.
- * This call does not destroy \p state.
- *
- * \param state A pending linker invocation
- * \param cubinOut On success, this will point to the output image
- * \param sizeOut Optional parameter to receive the size of the generated image
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa ::cuLinkCreate,
- * ::cuLinkAddData,
- * ::cuLinkAddFile,
- * ::cuLinkDestroy,
- * ::cuModuleLoadData
- */
- CUresult CUDAAPI
- cuLinkComplete(CUlinkState state, void **cubinOut, size_t *sizeOut);
- /**
- * \brief Destroys state for a JIT linker invocation.
- *
- * \param state State object for the linker invocation
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE
- *
- * \sa ::cuLinkCreate
- */
- CUresult CUDAAPI
- cuLinkDestroy(CUlinkState state);
- /** @} */ /* END CUDA_MODULE */
- /**
- * \defgroup CUDA_MODULE_DEPRECATED Module Management [DEPRECATED]
- *
- * ___MANBRIEF___ deprecated module management functions of the low-level CUDA
- * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the deprecated module management functions of the low-level
- * CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Returns a handle to a texture reference
- *
- * \deprecated
- *
- * Returns in \p *pTexRef the handle of the texture reference of name \p name
- * in the module \p hmod. If no texture reference of that name exists,
- * ::cuModuleGetTexRef() returns ::CUDA_ERROR_NOT_FOUND. This texture reference
- * handle should not be destroyed, since it will be destroyed when the module
- * is unloaded.
- *
- * \param pTexRef - Returned texture reference
- * \param hmod - Module to retrieve texture reference from
- * \param name - Name of texture reference to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_FOUND
- * \notefnerr
- *
- * \sa
- * ::cuModuleGetFunction,
- * ::cuModuleGetGlobal,
- * ::cuModuleGetSurfRef,
- * ::cuModuleLoad,
- * ::cuModuleLoadData,
- * ::cuModuleLoadDataEx,
- * ::cuModuleLoadFatBinary,
- * ::cuModuleUnload
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name);
- /**
- * \brief Returns a handle to a surface reference
- *
- * \deprecated
- *
- * Returns in \p *pSurfRef the handle of the surface reference of name \p name
- * in the module \p hmod. If no surface reference of that name exists,
- * ::cuModuleGetSurfRef() returns ::CUDA_ERROR_NOT_FOUND.
- *
- * \param pSurfRef - Returned surface reference
- * \param hmod - Module to retrieve surface reference from
- * \param name - Name of surface reference to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_FOUND
- * \notefnerr
- *
- * \sa
- * ::cuModuleGetFunction,
- * ::cuModuleGetGlobal,
- * ::cuModuleGetTexRef,
- * ::cuModuleLoad,
- * ::cuModuleLoadData,
- * ::cuModuleLoadDataEx,
- * ::cuModuleLoadFatBinary,
- * ::cuModuleUnload
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name);
- /** @} */ /* END CUDA_MODULE_DEPRECATED */
- /**
- * \defgroup CUDA_LIBRARY Library Management
- *
- * ___MANBRIEF___ library management functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the library management functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Load a library with specified code and options
- *
- * Takes a pointer \p code and loads the corresponding library \p library based on
- * the application defined library loading mode:
- * - If module loading is set to EAGER, via the environment variables described in "Module loading",
- * \p library is loaded eagerly into all contexts at the time of the call and future contexts
- * at the time of creation until the library is unloaded with ::cuLibraryUnload().
- * - If the environment variables are set to LAZY, \p library
- * is not immediately loaded onto all existent contexts and will only be
- * loaded when a function is needed for that context, such as a kernel launch.
- *
- * These environment variables are described in the CUDA programming guide under the
- * "CUDA environment variables" section.
- *
- * The \p code may be a \e cubin or \e fatbin as output by \b nvcc,
- * or a NULL-terminated \e PTX, either as output by \b nvcc or hand-written.
- * A fatbin should also contain relocatable code when doing separate compilation.
- *
- * Options are passed as an array via \p jitOptions and any corresponding parameters are passed in
- * \p jitOptionsValues. The number of total JIT options is supplied via \p numJitOptions.
- * Any outputs will be returned via \p jitOptionsValues.
- *
- * Library load options are passed as an array via \p libraryOptions and any corresponding parameters are passed in
- * \p libraryOptionValues. The number of total library load options is supplied via \p numLibraryOptions.
- *
- * \note If the library contains managed variables and no device in the system
- * supports managed variables this call is expected to return ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \param library - Returned library
- * \param code - Code to load
- * \param jitOptions - Options for JIT
- * \param jitOptionsValues - Option values for JIT
- * \param numJitOptions - Number of options
- * \param libraryOptions - Options for loading
- * \param libraryOptionValues - Option values for loading
- * \param numLibraryOptions - Number of options for loading
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_PTX,
- * ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NO_BINARY_FOR_GPU,
- * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
- * ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload,
- * ::cuModuleLoad,
- * ::cuModuleLoadData,
- * ::cuModuleLoadDataEx
- */
- CUresult CUDAAPI cuLibraryLoadData(CUlibrary *library, const void *code,
- CUjit_option *jitOptions, void **jitOptionsValues, unsigned int numJitOptions,
- CUlibraryOption *libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions);
- /**
- * \brief Load a library with specified file and options
- *
- * Takes a pointer \p code and loads the corresponding library \p library based on
- * the application defined library loading mode:
- * - If module loading is set to EAGER, via the environment variables described in "Module loading",
- * \p library is loaded eagerly into all contexts at the time of the call and future contexts
- * at the time of creation until the library is unloaded with ::cuLibraryUnload().
- * - If the environment variables are set to LAZY, \p library
- * is not immediately loaded onto all existent contexts and will only be
- * loaded when a function is needed for that context, such as a kernel launch.
- *
- * These environment variables are described in the CUDA programming guide under the
- * "CUDA environment variables" section.
- *
- * The file should be a \e cubin file as output by \b nvcc, or a \e PTX file either
- * as output by \b nvcc or handwritten, or a \e fatbin file as output by \b nvcc.
- * A fatbin should also contain relocatable code when doing separate compilation.
- *
- * Options are passed as an array via \p jitOptions and any corresponding parameters are
- * passed in \p jitOptionsValues. The number of total options is supplied via \p numJitOptions.
- * Any outputs will be returned via \p jitOptionsValues.
- *
- * Library load options are passed as an array via \p libraryOptions and any corresponding parameters are passed in
- * \p libraryOptionValues. The number of total library load options is supplied via \p numLibraryOptions.
- *
- * \note If the library contains managed variables and no device in the system
- * supports managed variables this call is expected to return ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \param library - Returned library
- * \param fileName - File to load from
- * \param jitOptions - Options for JIT
- * \param jitOptionsValues - Option values for JIT
- * \param numJitOptions - Number of options
- * \param libraryOptions - Options for loading
- * \param libraryOptionValues - Option values for loading
- * \param numLibraryOptions - Number of options for loading
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_PTX,
- * ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NO_BINARY_FOR_GPU,
- * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
- * ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryUnload,
- * ::cuModuleLoad,
- * ::cuModuleLoadData,
- * ::cuModuleLoadDataEx
- */
- CUresult CUDAAPI cuLibraryLoadFromFile(CUlibrary *library, const char *fileName,
- CUjit_option *jitOptions, void **jitOptionsValues, unsigned int numJitOptions,
- CUlibraryOption *libraryOptions, void **libraryOptionValues, unsigned int numLibraryOptions);
- /**
- * \brief Unloads a library
- *
- * Unloads the library specified with \p library
- *
- * \param library - Library to unload
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuModuleUnload
- */
- CUresult CUDAAPI cuLibraryUnload(CUlibrary library);
- /**
- * \brief Returns a kernel handle
- *
- * Returns in \p pKernel the handle of the kernel with name \p name located in library \p library.
- * If kernel handle is not found, the call returns ::CUDA_ERROR_NOT_FOUND.
- *
- * \param pKernel - Returned kernel handle
- * \param library - Library to retrieve kernel from
- * \param name - Name of kernel to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_FOUND
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload,
- * ::cuKernelGetFunction,
- * ::cuLibraryGetModule,
- * ::cuModuleGetFunction
- */
- CUresult CUDAAPI cuLibraryGetKernel(CUkernel *pKernel, CUlibrary library, const char *name);
- /**
- * \brief Returns the number of kernels within a library
- *
- * Returns in \p count the number of kernels in \p lib.
- *
- * \param count - Number of kernels found within the library
- * \param lib - Library to query
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- */
- CUresult CUDAAPI cuLibraryGetKernelCount(unsigned int *count, CUlibrary lib);
-
- /**
- * \brief Retrieve the kernel handles within a library.
- *
- * Returns in \p kernels a maximum number of \p numKernels kernel handles within \p lib.
- * The returned kernel handle becomes invalid when the library is unloaded.
- *
- * \param kernels - Buffer where the kernel handles are returned to
- * \param numKernels - Maximum number of kernel handles may be returned to the buffer
- * \param lib - Library to query from
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuLibraryGetKernelCount
- */
- CUresult CUDAAPI cuLibraryEnumerateKernels(CUkernel *kernels, unsigned int numKernels, CUlibrary lib);
- /**
- * \brief Returns a module handle
- *
- * Returns in \p pMod the module handle associated with the current context located in
- * library \p library. If module handle is not found, the call returns ::CUDA_ERROR_NOT_FOUND.
- *
- * \param pMod - Returned module handle
- * \param library - Library to retrieve module from
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_FOUND,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload,
- * ::cuModuleGetFunction
- */
- CUresult CUDAAPI cuLibraryGetModule(CUmodule *pMod, CUlibrary library);
- /**
- * \brief Returns a function handle
- *
- * Returns in \p pFunc the handle of the function for the requested kernel \p kernel and
- * the current context. If function handle is not found, the call returns ::CUDA_ERROR_NOT_FOUND.
- *
- * \param pFunc - Returned function handle
- * \param kernel - Kernel to retrieve function for the requested context
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_FOUND,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload,
- * ::cuLibraryGetKernel,
- * ::cuLibraryGetModule,
- * ::cuModuleGetFunction
- */
- CUresult CUDAAPI cuKernelGetFunction(CUfunction *pFunc, CUkernel kernel);
- /**
- * \brief Returns a library handle
- *
- * Returns in \p pLib the handle of the library for the requested kernel \p kernel
- *
- * \param pLib - Returned library handle
- * \param kernel - Kernel to retrieve library handle
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_FOUND
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload,
- * ::cuLibraryGetKernel
- */
- CUresult CUDAAPI cuKernelGetLibrary(CUlibrary *pLib, CUkernel kernel);
- /**
- * \brief Returns a global device pointer
- *
- * Returns in \p *dptr and \p *bytes the base pointer and size of the global with
- * name \p name for the requested library \p library and the current context.
- * If no global for the requested name \p name exists, the call returns ::CUDA_ERROR_NOT_FOUND.
- * One of the parameters \p dptr or \p bytes (not both) can be NULL in which
- * case it is ignored.
- *
- * \param dptr - Returned global device pointer for the requested context
- * \param bytes - Returned global size in bytes
- * \param library - Library to retrieve global from
- * \param name - Name of global to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_FOUND,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload,
- * ::cuLibraryGetModule,
- * cuModuleGetGlobal
- */
- CUresult CUDAAPI cuLibraryGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUlibrary library, const char *name);
- /**
- * \brief Returns a pointer to managed memory
- *
- * Returns in \p *dptr and \p *bytes the base pointer and size of the managed memory with
- * name \p name for the requested library \p library. If no managed memory with the
- * requested name \p name exists, the call returns ::CUDA_ERROR_NOT_FOUND. One of the parameters
- * \p dptr or \p bytes (not both) can be NULL in which case it is ignored.
- * Note that managed memory for library \p library is shared across devices and is registered
- * when the library is loaded into atleast one context.
- *
- * \param dptr - Returned pointer to the managed memory
- * \param bytes - Returned memory size in bytes
- * \param library - Library to retrieve managed memory from
- * \param name - Name of managed memory to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_FOUND
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload
- */
- CUresult CUDAAPI cuLibraryGetManaged(CUdeviceptr *dptr, size_t *bytes, CUlibrary library, const char *name);
- /**
- * \brief Returns a pointer to a unified function
- *
- * Returns in \p *fptr the function pointer to a unified function denoted by \p symbol.
- * If no unified function with name \p symbol exists, the call returns ::CUDA_ERROR_NOT_FOUND.
- * If there is no device with attribute ::CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS present in the system,
- * the call may return ::CUDA_ERROR_NOT_FOUND.
- *
- * \param fptr - Returned pointer to a unified function
- * \param library - Library to retrieve function pointer memory from
- * \param symbol - Name of function pointer to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_FOUND
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload
- */
- CUresult CUDAAPI cuLibraryGetUnifiedFunction(void **fptr, CUlibrary library, const char *symbol);
- /**
- * \brief Returns information about a kernel
- *
- * Returns in \p *pi the integer value of the attribute \p attrib for the kernel
- * \p kernel for the requested device \p dev. The supported attributes are:
- * - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threads
- * per block, beyond which a launch of the kernel would fail. This number
- * depends on both the kernel and the requested device.
- * - ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: The size in bytes of
- * statically-allocated shared memory per block required by this kernel.
- * This does not include dynamically-allocated shared memory requested by
- * the user at runtime.
- * - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: The size in bytes of user-allocated
- * constant memory required by this kernel.
- * - ::CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: The size in bytes of local memory
- * used by each thread of this kernel.
- * - ::CU_FUNC_ATTRIBUTE_NUM_REGS: The number of registers used by each thread
- * of this kernel.
- * - ::CU_FUNC_ATTRIBUTE_PTX_VERSION: The PTX virtual architecture version for
- * which the kernel was compiled. This value is the major PTX version * 10
- * + the minor PTX version, so a PTX version 1.3 function would return the
- * value 13. Note that this may return the undefined value of 0 for cubins
- * compiled prior to CUDA 3.0.
- * - ::CU_FUNC_ATTRIBUTE_BINARY_VERSION: The binary architecture version for
- * which the kernel was compiled. This value is the major binary
- * version * 10 + the minor binary version, so a binary version 1.3 function
- * would return the value 13. Note that this will return a value of 10 for
- * legacy cubins that do not have a properly-encoded binary architecture
- * version.
- * - ::CU_FUNC_CACHE_MODE_CA: The attribute to indicate whether the kernel has
- * been compiled with user specified option "-Xptxas --dlcm=ca" set.
- * - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: The maximum size in bytes of
- * dynamically-allocated shared memory.
- * - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: Preferred shared memory-L1
- * cache split ratio in percent of total shared memory.
- * - ::CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET: If this attribute is set, the
- * kernel must launch with a valid cluster size specified.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH: The required cluster width in
- * blocks.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT: The required cluster height in
- * blocks.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH: The required cluster depth in
- * blocks.
- * - ::CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED: Indicates whether
- * the function can be launched with non-portable cluster size. 1 is allowed,
- * 0 is disallowed. A non-portable cluster size may only function on the
- * specific SKUs the program is tested on. The launch might fail if the
- * program is run on a different hardware platform. CUDA API provides
- * cudaOccupancyMaxActiveClusters to assist with checking whether the desired
- * size can be launched on the current device. A portable cluster size is
- * guaranteed to be functional on all compute capabilities higher than the
- * target compute capability. The portable cluster size for sm_90 is 8 blocks
- * per cluster. This value may increase for future compute capabilities. The
- * specific hardware unit may support higher cluster sizes that’s not
- * guaranteed to be portable.
- * - ::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block
- * scheduling policy of a function. The value type is CUclusterSchedulingPolicy.
- *
- * \note If another thread is trying to set the same attribute on the same device using
- * ::cuKernelSetAttribute() simultaneously, the attribute query will give the old or new
- * value depending on the interleavings chosen by the OS scheduler and memory consistency.
- *
- * \param pi - Returned attribute value
- * \param attrib - Attribute requested
- * \param kernel - Kernel to query attribute of
- * \param dev - Device to query attribute of
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload,
- * ::cuKernelSetAttribute,
- * ::cuLibraryGetKernel,
- * ::cuLaunchKernel,
- * ::cuKernelGetFunction,
- * ::cuLibraryGetModule,
- * ::cuModuleGetFunction,
- * ::cuFuncGetAttribute
- */
- CUresult CUDAAPI cuKernelGetAttribute(int *pi, CUfunction_attribute attrib, CUkernel kernel, CUdevice dev);
- /**
- * \brief Sets information about a kernel
- *
- * This call sets the value of a specified attribute \p attrib on the kernel \p kernel
- * for the requested device \p dev to an integer value specified by \p val.
- * This function returns CUDA_SUCCESS if the new value of the attribute could be
- * successfully set. If the set fails, this call will return an error.
- * Not all attributes can have values set. Attempting to set a value on a read-only
- * attribute will result in an error (CUDA_ERROR_INVALID_VALUE)
- *
- * Note that attributes set using ::cuFuncSetAttribute() will override the attribute
- * set by this API irrespective of whether the call to ::cuFuncSetAttribute() is made
- * before or after this API call. However, ::cuKernelGetAttribute() will always
- * return the attribute value set by this API.
- *
- * Supported attributes are:
- * - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: This is the maximum size in bytes of
- * dynamically-allocated shared memory. The value should contain the requested
- * maximum size of dynamically-allocated shared memory. The sum of this value and
- * the function attribute ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES cannot exceed the
- * device attribute ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN.
- * The maximal size of requestable dynamic shared memory may differ by GPU
- * architecture.
- * - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: On devices where the L1
- * cache and shared memory use the same hardware resources, this sets the shared memory
- * carveout preference, in percent of the total shared memory.
- * See ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR
- * This is only a hint, and the driver can choose a different ratio if required to execute the function.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH: The required cluster width in
- * blocks. The width, height, and depth values must either all be 0 or all be
- * positive. The validity of the cluster dimensions is checked at launch time.
- * If the value is set during compile time, it cannot be set at runtime.
- * Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT: The required cluster height in
- * blocks. The width, height, and depth values must either all be 0 or all be
- * positive. The validity of the cluster dimensions is checked at launch time.
- * If the value is set during compile time, it cannot be set at runtime.
- * Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH: The required cluster depth in
- * blocks. The width, height, and depth values must either all be 0 or all be
- * positive. The validity of the cluster dimensions is checked at launch time.
- * If the value is set during compile time, it cannot be set at runtime.
- * Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
- * - ::CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED: Indicates whether
- * the function can be launched with non-portable cluster size. 1 is allowed,
- * 0 is disallowed.
- * - ::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block
- * scheduling policy of a function. The value type is CUclusterSchedulingPolicy.
- *
- * \note The API has stricter locking requirements in comparison to its legacy counterpart
- * ::cuFuncSetAttribute() due to device-wide semantics. If multiple threads are trying to
- * set the same attribute on the same device simultaneously, the attribute setting will depend
- * on the interleavings chosen by the OS scheduler and memory consistency.
- *
- * \param attrib - Attribute requested
- * \param val - Value to set
- * \param kernel - Kernel to set attribute of
- * \param dev - Device to set attribute of
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload,
- * ::cuKernelGetAttribute,
- * ::cuLibraryGetKernel,
- * ::cuLaunchKernel,
- * ::cuKernelGetFunction,
- * ::cuLibraryGetModule,
- * ::cuModuleGetFunction,
- * ::cuFuncSetAttribute
- */
- CUresult CUDAAPI cuKernelSetAttribute(CUfunction_attribute attrib, int val, CUkernel kernel, CUdevice dev);
- /**
- * \brief Sets the preferred cache configuration for a device kernel.
- *
- * On devices where the L1 cache and shared memory use the same hardware
- * resources, this sets through \p config the preferred cache configuration for
- * the device kernel \p kernel on the requested device \p dev. This is only a preference.
- * The driver will use the requested configuration if possible, but it is free to choose a different
- * configuration if required to execute \p kernel. Any context-wide preference
- * set via ::cuCtxSetCacheConfig() will be overridden by this per-kernel
- * setting.
- *
- * Note that attributes set using ::cuFuncSetCacheConfig() will override the attribute
- * set by this API irrespective of whether the call to ::cuFuncSetCacheConfig() is made
- * before or after this API call.
- *
- * This setting does nothing on devices where the size of the L1 cache and
- * shared memory are fixed.
- *
- * Launching a kernel with a different preference than the most recent
- * preference setting may insert a device-side synchronization point.
- *
- *
- * The supported cache configurations are:
- * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
- * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
- * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
- * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
- *
- * \note The API has stricter locking requirements in comparison to its legacy counterpart
- * ::cuFuncSetCacheConfig() due to device-wide semantics. If multiple threads are trying to
- * set a config on the same device simultaneously, the cache config setting will depend
- * on the interleavings chosen by the OS scheduler and memory consistency.
- *
- * \param kernel - Kernel to configure cache for
- * \param config - Requested cache configuration
- * \param dev - Device to set attribute of
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa ::cuLibraryLoadData,
- * ::cuLibraryLoadFromFile,
- * ::cuLibraryUnload,
- * ::cuLibraryGetKernel,
- * ::cuKernelGetFunction,
- * ::cuLibraryGetModule,
- * ::cuModuleGetFunction,
- * ::cuFuncSetCacheConfig,
- * ::cuCtxSetCacheConfig,
- * ::cuLaunchKernel
- */
- CUresult CUDAAPI cuKernelSetCacheConfig(CUkernel kernel, CUfunc_cache config, CUdevice dev);
- /**
- * \brief Returns the function name for a ::CUkernel handle
- *
- * Returns in \p **name the function name associated with the kernel handle \p hfunc .
- * The function name is returned as a null-terminated string. The returned name is only
- * valid when the kernel handle is valid. If the library is unloaded or reloaded, one
- * must call the API again to get the updated name. This API may return a mangled name if
- * the function is not declared as having C linkage. If either \p **name or \p hfunc
- * is NULL, ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * \param name - The returned name of the function
- * \param hfunc - The function handle to retrieve the name for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- */
- CUresult CUDAAPI cuKernelGetName(const char **name, CUkernel hfunc);
- /**
- * \brief Returns the offset and size of a kernel parameter in the device-side parameter layout
- *
- * Queries the kernel parameter at \p paramIndex into \p kernel's list of parameters, and returns
- * in \p paramOffset and \p paramSize the offset and size, respectively, where the parameter
- * will reside in the device-side parameter layout. This information can be used to update kernel
- * node parameters from the device via ::cudaGraphKernelNodeSetParam() and
- * ::cudaGraphKernelNodeUpdatesApply(). \p paramIndex must be less than the number of parameters
- * that \p kernel takes. \p paramSize can be set to NULL if only the parameter offset is desired.
- *
- * \param kernel - The kernel to query
- * \param paramIndex - The parameter index to query
- * \param paramOffset - Returns the offset into the device-side parameter layout at which the parameter resides
- * \param paramSize - Optionally returns the size of the parameter in the device-side parameter layout
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- * \sa ::cuFuncGetParamInfo
- */
- CUresult CUDAAPI cuKernelGetParamInfo(CUkernel kernel, size_t paramIndex, size_t *paramOffset, size_t *paramSize);
- /** @} */ /* END CUDA_LIBRARY */
- /**
- * \defgroup CUDA_MEM Memory Management
- *
- * ___MANBRIEF___ memory management functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the memory management functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Gets free and total memory
- *
- * Returns in \p *total the total amount of memory available to the the current context.
- * Returns in \p *free the amount of memory on the device that is free according to the OS.
- * CUDA is not guaranteed to be able to allocate all of the memory that the OS reports as free.
- * In a multi-tenet situation, free estimate returned is prone to race condition where
- * a new allocation/free done by a different process or a different thread in the same
- * process between the time when free memory was estimated and reported, will result in
- * deviation in free value reported and actual free memory.
- *
- * The integrated GPU on Tegra shares memory with CPU and other component
- * of the SoC. The free and total values returned by the API excludes
- * the SWAP memory space maintained by the OS on some platforms.
- * The OS may move some of the memory pages into swap area as the GPU or
- * CPU allocate or access memory. See Tegra app note on how to calculate
- * total and free memory on Tegra.
- *
- * \param free - Returned free memory in bytes
- * \param total - Returned total memory in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemGetInfo
- */
- CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total);
- /**
- * \brief Allocates device memory
- *
- * Allocates \p bytesize bytes of linear memory on the device and returns in
- * \p *dptr a pointer to the allocated memory. The allocated memory is suitably
- * aligned for any kind of variable. The memory is not cleared. If \p bytesize
- * is 0, ::cuMemAlloc() returns ::CUDA_ERROR_INVALID_VALUE.
- *
- * \param dptr - Returned device pointer
- * \param bytesize - Requested allocation size in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMalloc
- */
- CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize);
- /**
- * \brief Allocates pitched device memory
- *
- * Allocates at least \p WidthInBytes * \p Height bytes of linear memory on
- * the device and returns in \p *dptr a pointer to the allocated memory. The
- * function may pad the allocation to ensure that corresponding pointers in
- * any given row will continue to meet the alignment requirements for
- * coalescing as the address is updated from row to row. \p ElementSizeBytes
- * specifies the size of the largest reads and writes that will be performed
- * on the memory range. \p ElementSizeBytes may be 4, 8 or 16 (since coalesced
- * memory transactions are not possible on other data sizes). If
- * \p ElementSizeBytes is smaller than the actual read/write size of a kernel,
- * the kernel will run correctly, but possibly at reduced speed. The pitch
- * returned in \p *pPitch by ::cuMemAllocPitch() is the width in bytes of the
- * allocation. The intended usage of pitch is as a separate parameter of the
- * allocation, used to compute addresses within the 2D array. Given the row
- * and column of an array element of type \b T, the address is computed as:
- * \code
- T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;
- * \endcode
- *
- * The pitch returned by ::cuMemAllocPitch() is guaranteed to work with
- * ::cuMemcpy2D() under all circumstances. For allocations of 2D arrays, it is
- * recommended that programmers consider performing pitch allocations using
- * ::cuMemAllocPitch(). Due to alignment restrictions in the hardware, this is
- * especially true if the application will be performing 2D memory copies
- * between different regions of device memory (whether linear memory or CUDA
- * arrays).
- *
- * The byte alignment of the pitch returned by ::cuMemAllocPitch() is guaranteed
- * to match or exceed the alignment requirement for texture binding with
- * ::cuTexRefSetAddress2D().
- *
- * \param dptr - Returned device pointer
- * \param pPitch - Returned pitch of allocation in bytes
- * \param WidthInBytes - Requested allocation width in bytes
- * \param Height - Requested allocation height in rows
- * \param ElementSizeBytes - Size of largest reads/writes for range
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMallocPitch
- */
- CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
- /**
- * \brief Frees device memory
- *
- * Frees the memory space pointed to by \p dptr, which must have been returned
- * by a previous call to one of the following memory allocation APIs - ::cuMemAlloc(),
- * ::cuMemAllocPitch(), ::cuMemAllocManaged(), ::cuMemAllocAsync(), ::cuMemAllocFromPoolAsync()
- *
- * Note - This API will not perform any implict synchronization when the pointer was allocated with
- * ::cuMemAllocAsync or ::cuMemAllocFromPoolAsync. Callers must ensure that all accesses to these
- * pointer have completed before invoking ::cuMemFree. For best performance and memory reuse, users
- * should use ::cuMemFreeAsync to free memory allocated via the stream ordered memory allocator.
- * For all other pointers, this API may perform implicit synchronization.
- *
- * \param dptr - Pointer to memory to free
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemAllocManaged, ::cuMemAllocAsync, ::cuMemAllocFromPoolAsync,
- * ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, ::cuMemcpy3D, ::cuMemcpy3DAsync,
- * ::cuMemcpyAtoA, ::cuMemcpyAtoD, ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA,
- * ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA,
- * ::cuMemcpyHtoAAsync, ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, ::cuMemFreeAsync,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaFree
- */
- CUresult CUDAAPI cuMemFree(CUdeviceptr dptr);
- /**
- * \brief Get information on memory allocations
- *
- * Returns the base address in \p *pbase and size in \p *psize of the
- * allocation by ::cuMemAlloc() or ::cuMemAllocPitch() that contains the input
- * pointer \p dptr. Both parameters \p pbase and \p psize are optional. If one
- * of them is NULL, it is ignored.
- *
- * \param pbase - Returned base address
- * \param psize - Returned size of device memory allocation
- * \param dptr - Device pointer to query
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_NOT_FOUND,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
- */
- CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr);
- /**
- * \brief Allocates page-locked host memory
- *
- * Allocates \p bytesize bytes of host memory that is page-locked and
- * accessible to the device. The driver tracks the virtual memory ranges
- * allocated with this function and automatically accelerates calls to
- * functions such as ::cuMemcpy(). Since the memory can be accessed directly by
- * the device, it can be read or written with much higher bandwidth than
- * pageable memory obtained with functions such as ::malloc().
- *
- * On systems where ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES
- * is true, ::cuMemAllocHost may not page-lock the allocated memory.
- *
- * Page-locking excessive amounts of memory with ::cuMemAllocHost() may degrade system
- * performance, since it reduces the amount of memory available to the system
- * for paging. As a result, this function is best used sparingly to allocate
- * staging areas for data exchange between host and device.
- *
- * Note all host memory allocated using ::cuMemAllocHost() will automatically
- * be immediately accessible to all contexts on all devices which support unified
- * addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING).
- * The device pointer that may be used to access this host memory from those
- * contexts is always equal to the returned host pointer \p *pp.
- * See \ref CUDA_UNIFIED for additional details.
- *
- * \param pp - Returned pointer to host memory
- * \param bytesize - Requested allocation size in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMallocHost
- */
- CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize);
- /**
- * \brief Frees page-locked host memory
- *
- * Frees the memory space pointed to by \p p, which must have been returned by
- * a previous call to ::cuMemAllocHost().
- *
- * \param p - Pointer to memory to free
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaFreeHost
- */
- CUresult CUDAAPI cuMemFreeHost(void *p);
- /**
- * \brief Allocates page-locked host memory
- *
- * Allocates \p bytesize bytes of host memory that is page-locked and accessible
- * to the device. The driver tracks the virtual memory ranges allocated with
- * this function and automatically accelerates calls to functions such as
- * ::cuMemcpyHtoD(). Since the memory can be accessed directly by the device,
- * it can be read or written with much higher bandwidth than pageable memory
- * obtained with functions such as ::malloc().
- *
- * On systems where ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES
- * is true, ::cuMemHostAlloc may not page-lock the allocated memory.
- *
- * Page-locking excessive amounts of memory may degrade system performance,
- * since it reduces the amount of memory available to the system for paging.
- * As a result, this function is best used sparingly to allocate staging areas
- * for data exchange between host and device.
- *
- * The \p Flags parameter enables different options to be specified that
- * affect the allocation, as follows.
- *
- * - ::CU_MEMHOSTALLOC_PORTABLE: The memory returned by this call will be
- * considered as pinned memory by all CUDA contexts, not just the one that
- * performed the allocation.
- *
- * - ::CU_MEMHOSTALLOC_DEVICEMAP: Maps the allocation into the CUDA address
- * space. The device pointer to the memory may be obtained by calling
- * ::cuMemHostGetDevicePointer().
- *
- * - ::CU_MEMHOSTALLOC_WRITECOMBINED: Allocates the memory as write-combined
- * (WC). WC memory can be transferred across the PCI Express bus more
- * quickly on some system configurations, but cannot be read efficiently by
- * most CPUs. WC memory is a good option for buffers that will be written by
- * the CPU and read by the GPU via mapped pinned memory or host->device
- * transfers.
- *
- * All of these flags are orthogonal to one another: a developer may allocate
- * memory that is portable, mapped and/or write-combined with no restrictions.
- *
- * The ::CU_MEMHOSTALLOC_DEVICEMAP flag may be specified on CUDA contexts for
- * devices that do not support mapped pinned memory. The failure is deferred
- * to ::cuMemHostGetDevicePointer() because the memory may be mapped into
- * other CUDA contexts via the ::CU_MEMHOSTALLOC_PORTABLE flag.
- *
- * The memory allocated by this function must be freed with ::cuMemFreeHost().
- *
- * Note all host memory allocated using ::cuMemHostAlloc() will automatically
- * be immediately accessible to all contexts on all devices which support unified
- * addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING).
- * Unless the flag ::CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device pointer
- * that may be used to access this host memory from those contexts is always equal
- * to the returned host pointer \p *pp. If the flag ::CU_MEMHOSTALLOC_WRITECOMBINED
- * is specified, then the function ::cuMemHostGetDevicePointer() must be used
- * to query the device pointer, even if the context supports unified addressing.
- * See \ref CUDA_UNIFIED for additional details.
- *
- * \param pp - Returned pointer to host memory
- * \param bytesize - Requested allocation size in bytes
- * \param Flags - Flags for allocation request
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaHostAlloc
- */
- CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags);
- /**
- * \brief Passes back device pointer of mapped pinned memory
- *
- * Passes back the device pointer \p pdptr corresponding to the mapped, pinned
- * host buffer \p p allocated by ::cuMemHostAlloc.
- *
- * ::cuMemHostGetDevicePointer() will fail if the ::CU_MEMHOSTALLOC_DEVICEMAP
- * flag was not specified at the time the memory was allocated, or if the
- * function is called on a GPU that does not support mapped pinned memory.
- *
- * For devices that have a non-zero value for the device attribute
- * ::CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, the memory
- * can also be accessed from the device using the host pointer \p p.
- * The device pointer returned by ::cuMemHostGetDevicePointer() may or may not
- * match the original host pointer \p p and depends on the devices visible to the
- * application. If all devices visible to the application have a non-zero value for the
- * device attribute, the device pointer returned by ::cuMemHostGetDevicePointer()
- * will match the original pointer \p p. If any device visible to the application
- * has a zero value for the device attribute, the device pointer returned by
- * ::cuMemHostGetDevicePointer() will not match the original host pointer \p p,
- * but it will be suitable for use on all devices provided Unified Virtual Addressing
- * is enabled. In such systems, it is valid to access the memory using either pointer
- * on devices that have a non-zero value for the device attribute. Note however that
- * such devices should access the memory using only one of the two pointers and not both.
- *
- * \p Flags provides for future releases. For now, it must be set to 0.
- *
- * \param pdptr - Returned device pointer
- * \param p - Host pointer
- * \param Flags - Options (must be 0)
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaHostGetDevicePointer
- */
- CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags);
- /**
- * \brief Passes back flags that were used for a pinned allocation
- *
- * Passes back the flags \p pFlags that were specified when allocating
- * the pinned host buffer \p p allocated by ::cuMemHostAlloc.
- *
- * ::cuMemHostGetFlags() will fail if the pointer does not reside in
- * an allocation performed by ::cuMemAllocHost() or ::cuMemHostAlloc().
- *
- * \param pFlags - Returned flags word
- * \param p - Host pointer
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::cuMemAllocHost,
- * ::cuMemHostAlloc,
- * ::cudaHostGetFlags
- */
- CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p);
- /**
- * \brief Allocates memory that will be automatically managed by the Unified Memory system
- *
- * Allocates \p bytesize bytes of managed memory on the device and returns in
- * \p *dptr a pointer to the allocated memory. If the device doesn't support
- * allocating managed memory, ::CUDA_ERROR_NOT_SUPPORTED is returned. Support
- * for managed memory can be queried using the device attribute
- * ::CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY. The allocated memory is suitably
- * aligned for any kind of variable. The memory is not cleared. If \p bytesize
- * is 0, ::cuMemAllocManaged returns ::CUDA_ERROR_INVALID_VALUE. The pointer
- * is valid on the CPU and on all GPUs in the system that support managed memory.
- * All accesses to this pointer must obey the Unified Memory programming model.
- *
- * \p flags specifies the default stream association for this allocation.
- * \p flags must be one of ::CU_MEM_ATTACH_GLOBAL or ::CU_MEM_ATTACH_HOST. If
- * ::CU_MEM_ATTACH_GLOBAL is specified, then this memory is accessible from
- * any stream on any device. If ::CU_MEM_ATTACH_HOST is specified, then the
- * allocation should not be accessed from devices that have a zero value for the
- * device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS; an explicit call to
- * ::cuStreamAttachMemAsync will be required to enable access on such devices.
- *
- * If the association is later changed via ::cuStreamAttachMemAsync to
- * a single stream, the default association as specified during ::cuMemAllocManaged
- * is restored when that stream is destroyed. For __managed__ variables, the
- * default association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a
- * stream is an asynchronous operation, and as a result, the change to default
- * association won't happen until all work in the stream has completed.
- *
- * Memory allocated with ::cuMemAllocManaged should be released with ::cuMemFree.
- *
- * Device memory oversubscription is possible for GPUs that have a non-zero value for the
- * device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Managed memory on
- * such GPUs may be evicted from device memory to host memory at any time by the Unified
- * Memory driver in order to make room for other allocations.
- *
- * In a system where all GPUs have a non-zero value for the device attribute
- * ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, managed memory may not be populated when this
- * API returns and instead may be populated on access. In such systems, managed memory can
- * migrate to any processor's memory at any time. The Unified Memory driver will employ heuristics to
- * maintain data locality and prevent excessive page faults to the extent possible. The application
- * can also guide the driver about memory usage patterns via ::cuMemAdvise. The application
- * can also explicitly migrate memory to a desired processor's memory via
- * ::cuMemPrefetchAsync.
- *
- * In a multi-GPU system where all of the GPUs have a zero value for the device attribute
- * ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS and all the GPUs have peer-to-peer support
- * with each other, the physical storage for managed memory is created on the GPU which is active
- * at the time ::cuMemAllocManaged is called. All other GPUs will reference the data at reduced
- * bandwidth via peer mappings over the PCIe bus. The Unified Memory driver does not migrate
- * memory among such GPUs.
- *
- * In a multi-GPU system where not all GPUs have peer-to-peer support with each other and
- * where the value of the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS
- * is zero for at least one of those GPUs, the location chosen for physical storage of managed
- * memory is system-dependent.
- * - On Linux, the location chosen will be device memory as long as the current set of active
- * contexts are on devices that either have peer-to-peer support with each other or have a
- * non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS.
- * If there is an active context on a GPU that does not have a non-zero value for that device
- * attribute and it does not have peer-to-peer support with the other devices that have active
- * contexts on them, then the location for physical storage will be 'zero-copy' or host memory.
- * Note that this means that managed memory that is located in device memory is migrated to
- * host memory if a new context is created on a GPU that doesn't have a non-zero value for
- * the device attribute and does not support peer-to-peer with at least one of the other devices
- * that has an active context. This in turn implies that context creation may fail if there is
- * insufficient host memory to migrate all managed allocations.
- * - On Windows, the physical storage is always created in 'zero-copy' or host memory.
- * All GPUs will reference the data at reduced bandwidth over the PCIe bus. In these
- * circumstances, use of the environment variable CUDA_VISIBLE_DEVICES is recommended to
- * restrict CUDA to only use those GPUs that have peer-to-peer support.
- * Alternatively, users can also set CUDA_MANAGED_FORCE_DEVICE_ALLOC to a
- * non-zero value to force the driver to always use device memory for physical storage.
- * When this environment variable is set to a non-zero value, all contexts created in
- * that process on devices that support managed memory have to be peer-to-peer compatible
- * with each other. Context creation will fail if a context is created on a device that
- * supports managed memory and is not peer-to-peer compatible with any of the other
- * managed memory supporting devices on which contexts were previously created, even if
- * those contexts have been destroyed. These environment variables are described
- * in the CUDA programming guide under the "CUDA environment variables" section.
- * - On ARM, managed memory is not available on discrete gpu with Drive PX-2.
- *
- * \param dptr - Returned device pointer
- * \param bytesize - Requested allocation size in bytes
- * \param flags - Must be one of ::CU_MEM_ATTACH_GLOBAL or ::CU_MEM_ATTACH_HOST
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cuDeviceGetAttribute, ::cuStreamAttachMemAsync,
- * ::cudaMallocManaged
- */
- CUresult CUDAAPI cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, unsigned int flags);
- /**
- * \brief Registers a callback function to receive async notifications
- *
- * Registers \p callbackFunc to receive async notifications.
- *
- * The \p userData parameter is passed to the callback function at async notification time.
- * Likewise, \p callback is also passed to the callback function to distinguish between
- * multiple registered callbacks.
- *
- * The callback function being registered should be designed to return quickly (~10ms).
- * Any long running tasks should be queued for execution on an application thread.
- *
- * Callbacks may not call cuDeviceRegisterAsyncNotification or cuDeviceUnregisterAsyncNotification.
- * Doing so will result in ::CUDA_ERROR_NOT_PERMITTED. Async notification callbacks execute
- * in an undefined order and may be serialized.
- *
- * Returns in \p *callback a handle representing the registered callback instance.
- *
- * \param device - The device on which to register the callback
- * \param callbackFunc - The function to register as a callback
- * \param userData - A generic pointer to user data. This is passed into the callback function.
- * \param callback - A handle representing the registered callback instance
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_NOT_SUPPORTED
- * ::CUDA_ERROR_INVALID_DEVICE
- * ::CUDA_ERROR_INVALID_VALUE
- * ::CUDA_ERROR_NOT_PERMITTED
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cuDeviceUnregisterAsyncNotification
- */
- CUresult CUDAAPI cuDeviceRegisterAsyncNotification(CUdevice device, CUasyncCallback callbackFunc, void *userData, CUasyncCallbackHandle *callback);
- /**
- * \brief Unregisters an async notification callback
- *
- * Unregisters \p callback so that the corresponding callback function will stop receiving
- * async notifications.
- *
- * \param device - The device from which to remove \p callback.
- * \param callback - The callback instance to unregister from receiving async notifications.
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_NOT_SUPPORTED
- * ::CUDA_ERROR_INVALID_DEVICE
- * ::CUDA_ERROR_INVALID_VALUE
- * ::CUDA_ERROR_NOT_PERMITTED
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cuDeviceRegisterAsyncNotification
- */
- CUresult CUDAAPI cuDeviceUnregisterAsyncNotification(CUdevice device, CUasyncCallbackHandle callback);
- /**
- * \brief Returns a handle to a compute device
- *
- * Returns in \p *device a device handle given a PCI bus ID string.
- *
- * \param dev - Returned device handle
- *
- * \param pciBusId - String in one of the following forms:
- * [domain]:[bus]:[device].[function]
- * [domain]:[bus]:[device]
- * [bus]:[device].[function]
- * where \p domain, \p bus, \p device, and \p function are all hexadecimal values
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGet,
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetPCIBusId,
- * ::cudaDeviceGetByPCIBusId
- */
- CUresult CUDAAPI cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId);
- /**
- * \brief Returns a PCI Bus Id string for the device
- *
- * Returns an ASCII string identifying the device \p dev in the NULL-terminated
- * string pointed to by \p pciBusId. \p len specifies the maximum length of the
- * string that may be returned.
- *
- * \param pciBusId - Returned identifier string for the device in the following format
- * [domain]:[bus]:[device].[function]
- * where \p domain, \p bus, \p device, and \p function are all hexadecimal values.
- * pciBusId should be large enough to store 13 characters including the NULL-terminator.
- *
- * \param len - Maximum length of string to store in \p name
- *
- * \param dev - Device to get identifier string for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceGet,
- * ::cuDeviceGetAttribute,
- * ::cuDeviceGetByPCIBusId,
- * ::cudaDeviceGetPCIBusId
- */
- CUresult CUDAAPI cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev);
- /**
- * \brief Gets an interprocess handle for a previously allocated event
- *
- * Takes as input a previously allocated event. This event must have been
- * created with the ::CU_EVENT_INTERPROCESS and ::CU_EVENT_DISABLE_TIMING
- * flags set. This opaque handle may be copied into other processes and
- * opened with ::cuIpcOpenEventHandle to allow efficient hardware
- * synchronization between GPU work in different processes.
- *
- * After the event has been opened in the importing process,
- * ::cuEventRecord, ::cuEventSynchronize, ::cuStreamWaitEvent and
- * ::cuEventQuery may be used in either process. Performing operations
- * on the imported event after the exported event has been freed
- * with ::cuEventDestroy will result in undefined behavior.
- *
- * IPC functionality is restricted to devices with support for unified
- * addressing on Linux and Windows operating systems.
- * IPC functionality on Windows is supported for compatibility purposes
- * but not recommended as it comes with performance cost.
- * Users can test their device for IPC functionality by calling
- * ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED
- *
- * \param pHandle - Pointer to a user allocated CUipcEventHandle
- * in which to return the opaque event handle
- * \param event - Event allocated with ::CU_EVENT_INTERPROCESS and
- * ::CU_EVENT_DISABLE_TIMING flags.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_MAP_FAILED,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuEventCreate,
- * ::cuEventDestroy,
- * ::cuEventSynchronize,
- * ::cuEventQuery,
- * ::cuStreamWaitEvent,
- * ::cuIpcOpenEventHandle,
- * ::cuIpcGetMemHandle,
- * ::cuIpcOpenMemHandle,
- * ::cuIpcCloseMemHandle,
- * ::cudaIpcGetEventHandle
- */
- CUresult CUDAAPI cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event);
- /**
- * \brief Opens an interprocess event handle for use in the current process
- *
- * Opens an interprocess event handle exported from another process with
- * ::cuIpcGetEventHandle. This function returns a ::CUevent that behaves like
- * a locally created event with the ::CU_EVENT_DISABLE_TIMING flag specified.
- * This event must be freed with ::cuEventDestroy.
- *
- * Performing operations on the imported event after the exported event has
- * been freed with ::cuEventDestroy will result in undefined behavior.
- *
- * IPC functionality is restricted to devices with support for unified
- * addressing on Linux and Windows operating systems.
- * IPC functionality on Windows is supported for compatibility purposes
- * but not recommended as it comes with performance cost.
- * Users can test their device for IPC functionality by calling
- * ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED
- *
- * \param phEvent - Returns the imported event
- * \param handle - Interprocess handle to open
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_MAP_FAILED,
- * ::CUDA_ERROR_PEER_ACCESS_UNSUPPORTED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuEventCreate,
- * ::cuEventDestroy,
- * ::cuEventSynchronize,
- * ::cuEventQuery,
- * ::cuStreamWaitEvent,
- * ::cuIpcGetEventHandle,
- * ::cuIpcGetMemHandle,
- * ::cuIpcOpenMemHandle,
- * ::cuIpcCloseMemHandle,
- * ::cudaIpcOpenEventHandle
- */
- CUresult CUDAAPI cuIpcOpenEventHandle(CUevent *phEvent, CUipcEventHandle handle);
- /**
- * \brief Gets an interprocess memory handle for an existing device memory
- * allocation
- *
- * Takes a pointer to the base of an existing device memory allocation created
- * with ::cuMemAlloc and exports it for use in another process. This is a
- * lightweight operation and may be called multiple times on an allocation
- * without adverse effects.
- *
- * If a region of memory is freed with ::cuMemFree and a subsequent call
- * to ::cuMemAlloc returns memory with the same device address,
- * ::cuIpcGetMemHandle will return a unique handle for the
- * new memory.
- *
- * IPC functionality is restricted to devices with support for unified
- * addressing on Linux and Windows operating systems.
- * IPC functionality on Windows is supported for compatibility purposes
- * but not recommended as it comes with performance cost.
- * Users can test their device for IPC functionality by calling
- * ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED
- *
- * \param pHandle - Pointer to user allocated ::CUipcMemHandle to return
- * the handle in.
- * \param dptr - Base pointer to previously allocated device memory
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_MAP_FAILED,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuMemAlloc,
- * ::cuMemFree,
- * ::cuIpcGetEventHandle,
- * ::cuIpcOpenEventHandle,
- * ::cuIpcOpenMemHandle,
- * ::cuIpcCloseMemHandle,
- * ::cudaIpcGetMemHandle
- */
- CUresult CUDAAPI cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr);
- /**
- * \brief Opens an interprocess memory handle exported from another process
- * and returns a device pointer usable in the local process.
- *
- * Maps memory exported from another process with ::cuIpcGetMemHandle into
- * the current device address space. For contexts on different devices
- * ::cuIpcOpenMemHandle can attempt to enable peer access between the
- * devices as if the user called ::cuCtxEnablePeerAccess. This behavior is
- * controlled by the ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag.
- * ::cuDeviceCanAccessPeer can determine if a mapping is possible.
- *
- * Contexts that may open ::CUipcMemHandles are restricted in the following way.
- * ::CUipcMemHandles from each ::CUdevice in a given process may only be opened
- * by one ::CUcontext per ::CUdevice per other process.
- *
- * If the memory handle has already been opened by the current context, the
- * reference count on the handle is incremented by 1 and the existing device pointer
- * is returned.
- *
- * Memory returned from ::cuIpcOpenMemHandle must be freed with
- * ::cuIpcCloseMemHandle.
- *
- * Calling ::cuMemFree on an exported memory region before calling
- * ::cuIpcCloseMemHandle in the importing context will result in undefined
- * behavior.
- *
- * IPC functionality is restricted to devices with support for unified
- * addressing on Linux and Windows operating systems.
- * IPC functionality on Windows is supported for compatibility purposes
- * but not recommended as it comes with performance cost.
- * Users can test their device for IPC functionality by calling
- * ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED
- *
- * \param pdptr - Returned device pointer
- * \param handle - ::CUipcMemHandle to open
- * \param Flags - Flags for this operation. Must be specified as ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_MAP_FAILED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_TOO_MANY_PEERS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \note No guarantees are made about the address returned in \p *pdptr.
- * In particular, multiple processes may not receive the same address for the same \p handle.
- *
- * \sa
- * ::cuMemAlloc,
- * ::cuMemFree,
- * ::cuIpcGetEventHandle,
- * ::cuIpcOpenEventHandle,
- * ::cuIpcGetMemHandle,
- * ::cuIpcCloseMemHandle,
- * ::cuCtxEnablePeerAccess,
- * ::cuDeviceCanAccessPeer,
- * ::cudaIpcOpenMemHandle
- */
- CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int Flags);
- /**
- * \brief Attempts to close memory mapped with ::cuIpcOpenMemHandle
- *
- * Decrements the reference count of the memory returned by ::cuIpcOpenMemHandle by 1.
- * When the reference count reaches 0, this API unmaps the memory. The original allocation
- * in the exporting process as well as imported mappings in other processes
- * will be unaffected.
- *
- * Any resources used to enable peer access will be freed if this is the
- * last mapping using them.
- *
- * IPC functionality is restricted to devices with support for unified
- * addressing on Linux and Windows operating systems.
- * IPC functionality on Windows is supported for compatibility purposes
- * but not recommended as it comes with performance cost.
- * Users can test their device for IPC functionality by calling
- * ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED
- *
- * \param dptr - Device pointer returned by ::cuIpcOpenMemHandle
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_MAP_FAILED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- * \sa
- * ::cuMemAlloc,
- * ::cuMemFree,
- * ::cuIpcGetEventHandle,
- * ::cuIpcOpenEventHandle,
- * ::cuIpcGetMemHandle,
- * ::cuIpcOpenMemHandle,
- * ::cudaIpcCloseMemHandle
- */
- CUresult CUDAAPI cuIpcCloseMemHandle(CUdeviceptr dptr);
- /**
- * \brief Registers an existing host memory range for use by CUDA
- *
- * Page-locks the memory range specified by \p p and \p bytesize and maps it
- * for the device(s) as specified by \p Flags. This memory range also is added
- * to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate
- * calls to functions such as ::cuMemcpyHtoD(). Since the memory can be accessed
- * directly by the device, it can be read or written with much higher bandwidth
- * than pageable memory that has not been registered. Page-locking excessive
- * amounts of memory may degrade system performance, since it reduces the amount
- * of memory available to the system for paging. As a result, this function is
- * best used sparingly to register staging areas for data exchange between
- * host and device.
- *
- * On systems where ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES
- * is true, ::cuMemHostRegister will not page-lock the memory range specified
- * by \p ptr but only populate unpopulated pages.
- *
- * The \p Flags parameter enables different options to be specified that
- * affect the allocation, as follows.
- *
- * - ::CU_MEMHOSTREGISTER_PORTABLE: The memory returned by this call will be
- * considered as pinned memory by all CUDA contexts, not just the one that
- * performed the allocation.
- *
- * - ::CU_MEMHOSTREGISTER_DEVICEMAP: Maps the allocation into the CUDA address
- * space. The device pointer to the memory may be obtained by calling
- * ::cuMemHostGetDevicePointer().
- *
- * - ::CU_MEMHOSTREGISTER_IOMEMORY: The pointer is treated as pointing to some
- * I/O memory space, e.g. the PCI Express resource of a 3rd party device.
- *
- * - ::CU_MEMHOSTREGISTER_READ_ONLY: The pointer is treated as pointing to memory
- * that is considered read-only by the device. On platforms without
- * ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, this flag is
- * required in order to register memory mapped to the CPU as read-only. Support
- * for the use of this flag can be queried from the device attribute
- * ::CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED. Using this flag with
- * a current context associated with a device that does not have this attribute
- * set will cause ::cuMemHostRegister to error with CUDA_ERROR_NOT_SUPPORTED.
- *
- * All of these flags are orthogonal to one another: a developer may page-lock
- * memory that is portable or mapped with no restrictions.
- *
- * The ::CU_MEMHOSTREGISTER_DEVICEMAP flag may be specified on CUDA contexts for
- * devices that do not support mapped pinned memory. The failure is deferred
- * to ::cuMemHostGetDevicePointer() because the memory may be mapped into
- * other CUDA contexts via the ::CU_MEMHOSTREGISTER_PORTABLE flag.
- *
- * For devices that have a non-zero value for the device attribute
- * ::CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, the memory
- * can also be accessed from the device using the host pointer \p p.
- * The device pointer returned by ::cuMemHostGetDevicePointer() may or may not
- * match the original host pointer \p ptr and depends on the devices visible to the
- * application. If all devices visible to the application have a non-zero value for the
- * device attribute, the device pointer returned by ::cuMemHostGetDevicePointer()
- * will match the original pointer \p ptr. If any device visible to the application
- * has a zero value for the device attribute, the device pointer returned by
- * ::cuMemHostGetDevicePointer() will not match the original host pointer \p ptr,
- * but it will be suitable for use on all devices provided Unified Virtual Addressing
- * is enabled. In such systems, it is valid to access the memory using either pointer
- * on devices that have a non-zero value for the device attribute. Note however that
- * such devices should access the memory using only of the two pointers and not both.
- *
- * The memory page-locked by this function must be unregistered with
- * ::cuMemHostUnregister().
- *
- * \param p - Host pointer to memory to page-lock
- * \param bytesize - Size in bytes of the address range to page-lock
- * \param Flags - Flags for allocation request
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa
- * ::cuMemHostUnregister,
- * ::cuMemHostGetFlags,
- * ::cuMemHostGetDevicePointer,
- * ::cudaHostRegister
- */
- CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, unsigned int Flags);
- /**
- * \brief Unregisters a memory range that was registered with cuMemHostRegister.
- *
- * Unmaps the memory range whose base address is specified by \p p, and makes
- * it pageable again.
- *
- * The base address must be the same one specified to ::cuMemHostRegister().
- *
- * \param p - Host pointer to memory to unregister
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED,
- * \notefnerr
- *
- * \sa
- * ::cuMemHostRegister,
- * ::cudaHostUnregister
- */
- CUresult CUDAAPI cuMemHostUnregister(void *p);
- /**
- * \brief Copies memory
- *
- * Copies data between two pointers.
- * \p dst and \p src are base pointers of the destination and source, respectively.
- * \p ByteCount specifies the number of bytes to copy.
- * Note that this function infers the type of the transfer (host to host, host to
- * device, device to device, or device to host) from the pointer values. This
- * function is only allowed in contexts which support unified addressing.
- *
- * \param dst - Destination unified virtual address space pointer
- * \param src - Source unified virtual address space pointer
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpy,
- * ::cudaMemcpyToSymbol,
- * ::cudaMemcpyFromSymbol
- */
- CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
- /**
- * \brief Copies device memory between two contexts
- *
- * Copies from device memory in one context to device memory in another
- * context. \p dstDevice is the base device pointer of the destination memory
- * and \p dstContext is the destination context. \p srcDevice is the base
- * device pointer of the source memory and \p srcContext is the source pointer.
- * \p ByteCount specifies the number of bytes to copy.
- *
- * \param dstDevice - Destination device pointer
- * \param dstContext - Destination context
- * \param srcDevice - Source device pointer
- * \param srcContext - Source context
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuMemcpyDtoD, ::cuMemcpy3DPeer, ::cuMemcpyDtoDAsync, ::cuMemcpyPeerAsync,
- * ::cuMemcpy3DPeerAsync,
- * ::cudaMemcpyPeer
- */
- CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
- /**
- * \brief Copies memory from Host to Device
- *
- * Copies from host memory to device memory. \p dstDevice and \p srcHost are
- * the base addresses of the destination and source, respectively. \p ByteCount
- * specifies the number of bytes to copy.
- *
- * \param dstDevice - Destination device pointer
- * \param srcHost - Source host pointer
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpy,
- * ::cudaMemcpyToSymbol
- */
- CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
- /**
- * \brief Copies memory from Device to Host
- *
- * Copies from device to host memory. \p dstHost and \p srcDevice specify the
- * base pointers of the destination and source, respectively. \p ByteCount
- * specifies the number of bytes to copy.
- *
- * \param dstHost - Destination host pointer
- * \param srcDevice - Source device pointer
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpy,
- * ::cudaMemcpyFromSymbol
- */
- CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
- /**
- * \brief Copies memory from Device to Device
- *
- * Copies from device memory to device memory. \p dstDevice and \p srcDevice
- * are the base pointers of the destination and source, respectively.
- * \p ByteCount specifies the number of bytes to copy.
- *
- * \param dstDevice - Destination device pointer
- * \param srcDevice - Source device pointer
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpy,
- * ::cudaMemcpyToSymbol,
- * ::cudaMemcpyFromSymbol
- */
- CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
- /**
- * \brief Copies memory from Device to Array
- *
- * Copies from device memory to a 1D CUDA array. \p dstArray and \p dstOffset
- * specify the CUDA array handle and starting index of the destination data.
- * \p srcDevice specifies the base pointer of the source. \p ByteCount
- * specifies the number of bytes to copy.
- *
- * \param dstArray - Destination array
- * \param dstOffset - Offset in bytes of destination array
- * \param srcDevice - Source device pointer
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpyToArray
- */
- CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
- /**
- * \brief Copies memory from Array to Device
- *
- * Copies from one 1D CUDA array to device memory. \p dstDevice specifies the
- * base pointer of the destination and must be naturally aligned with the CUDA
- * array elements. \p srcArray and \p srcOffset specify the CUDA array handle
- * and the offset in bytes into the array where the copy is to begin.
- * \p ByteCount specifies the number of bytes to copy and must be evenly
- * divisible by the array element size.
- *
- * \param dstDevice - Destination device pointer
- * \param srcArray - Source array
- * \param srcOffset - Offset in bytes of source array
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpyFromArray
- */
- CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
- /**
- * \brief Copies memory from Host to Array
- *
- * Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset
- * specify the CUDA array handle and starting offset in bytes of the destination
- * data. \p pSrc specifies the base address of the source. \p ByteCount specifies
- * the number of bytes to copy.
- *
- * \param dstArray - Destination array
- * \param dstOffset - Offset in bytes of destination array
- * \param srcHost - Source host pointer
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpyToArray
- */
- CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount);
- /**
- * \brief Copies memory from Array to Host
- *
- * Copies from one 1D CUDA array to host memory. \p dstHost specifies the base
- * pointer of the destination. \p srcArray and \p srcOffset specify the CUDA
- * array handle and starting offset in bytes of the source data.
- * \p ByteCount specifies the number of bytes to copy.
- *
- * \param dstHost - Destination device pointer
- * \param srcArray - Source array
- * \param srcOffset - Offset in bytes of source array
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpyFromArray
- */
- CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
- /**
- * \brief Copies memory from Array to Array
- *
- * Copies from one 1D CUDA array to another. \p dstArray and \p srcArray
- * specify the handles of the destination and source CUDA arrays for the copy,
- * respectively. \p dstOffset and \p srcOffset specify the destination and
- * source offsets in bytes into the CUDA arrays. \p ByteCount is the number of
- * bytes to be copied. The size of the elements in the CUDA arrays need not be
- * the same format, but the elements must be the same size; and count must be
- * evenly divisible by that size.
- *
- * \param dstArray - Destination array
- * \param dstOffset - Offset in bytes of destination array
- * \param srcArray - Source array
- * \param srcOffset - Offset in bytes of source array
- * \param ByteCount - Size of memory copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpyArrayToArray
- */
- CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
- /**
- * \brief Copies memory for 2D arrays
- *
- * Perform a 2D memory copy according to the parameters specified in \p pCopy.
- * The ::CUDA_MEMCPY2D structure is defined as:
- *
- * \code
- typedef struct CUDA_MEMCPY2D_st {
- unsigned int srcXInBytes, srcY;
- CUmemorytype srcMemoryType;
- const void *srcHost;
- CUdeviceptr srcDevice;
- CUarray srcArray;
- unsigned int srcPitch;
- unsigned int dstXInBytes, dstY;
- CUmemorytype dstMemoryType;
- void *dstHost;
- CUdeviceptr dstDevice;
- CUarray dstArray;
- unsigned int dstPitch;
- unsigned int WidthInBytes;
- unsigned int Height;
- } CUDA_MEMCPY2D;
- * \endcode
- * where:
- * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
- * source and destination, respectively; ::CUmemorytype_enum is defined as:
- *
- * \code
- typedef enum CUmemorytype_enum {
- CU_MEMORYTYPE_HOST = 0x01,
- CU_MEMORYTYPE_DEVICE = 0x02,
- CU_MEMORYTYPE_ARRAY = 0x03,
- CU_MEMORYTYPE_UNIFIED = 0x04
- } CUmemorytype;
- * \endcode
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::srcArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch
- * specify the (host) base address of the source data and the bytes per row to
- * apply. ::srcArray is ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch
- * specify the (device) base address of the source data and the bytes per row
- * to apply. ::srcArray is ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
- * handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are
- * ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
- * specify the (host) base address of the destination data and the bytes per
- * row to apply. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::dstArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
- * specify the (device) base address of the destination data and the bytes per
- * row to apply. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
- * handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch are
- * ignored.
- *
- * - ::srcXInBytes and ::srcY specify the base address of the source data for
- * the copy.
- *
- * \par
- * For host pointers, the starting address is
- * \code
- void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
- * element size.
- *
- * - ::dstXInBytes and ::dstY specify the base address of the destination data
- * for the copy.
- *
- * \par
- * For host pointers, the base address is
- * \code
- void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
- * element size.
- *
- * - ::WidthInBytes and ::Height specify the width (in bytes) and height of
- * the 2D copy being performed.
- * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes +
- * ::srcXInBytes, and ::dstPitch must be greater than or equal to
- * ::WidthInBytes + dstXInBytes.
- *
- * \par
- * ::cuMemcpy2D() returns an error if any pitch is greater than the maximum
- * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes back
- * pitches that always work with ::cuMemcpy2D(). On intra-device memory copies
- * (device to device, CUDA array to device, CUDA array to CUDA array),
- * ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch().
- * ::cuMemcpy2DUnaligned() does not have this restriction, but may run
- * significantly slower in the cases where ::cuMemcpy2D() would have returned
- * an error code.
- *
- * \param pCopy - Parameters for the memory copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpy2D,
- * ::cudaMemcpy2DToArray,
- * ::cudaMemcpy2DFromArray
- */
- CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
- /**
- * \brief Copies memory for 2D arrays
- *
- * Perform a 2D memory copy according to the parameters specified in \p pCopy.
- * The ::CUDA_MEMCPY2D structure is defined as:
- *
- * \code
- typedef struct CUDA_MEMCPY2D_st {
- unsigned int srcXInBytes, srcY;
- CUmemorytype srcMemoryType;
- const void *srcHost;
- CUdeviceptr srcDevice;
- CUarray srcArray;
- unsigned int srcPitch;
- unsigned int dstXInBytes, dstY;
- CUmemorytype dstMemoryType;
- void *dstHost;
- CUdeviceptr dstDevice;
- CUarray dstArray;
- unsigned int dstPitch;
- unsigned int WidthInBytes;
- unsigned int Height;
- } CUDA_MEMCPY2D;
- * \endcode
- * where:
- * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
- * source and destination, respectively; ::CUmemorytype_enum is defined as:
- *
- * \code
- typedef enum CUmemorytype_enum {
- CU_MEMORYTYPE_HOST = 0x01,
- CU_MEMORYTYPE_DEVICE = 0x02,
- CU_MEMORYTYPE_ARRAY = 0x03,
- CU_MEMORYTYPE_UNIFIED = 0x04
- } CUmemorytype;
- * \endcode
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::srcArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch
- * specify the (host) base address of the source data and the bytes per row to
- * apply. ::srcArray is ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch
- * specify the (device) base address of the source data and the bytes per row
- * to apply. ::srcArray is ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
- * handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are
- * ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::dstArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
- * specify the (host) base address of the destination data and the bytes per
- * row to apply. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
- * specify the (device) base address of the destination data and the bytes per
- * row to apply. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
- * handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch are
- * ignored.
- *
- * - ::srcXInBytes and ::srcY specify the base address of the source data for
- * the copy.
- *
- * \par
- * For host pointers, the starting address is
- * \code
- void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
- * element size.
- *
- * - ::dstXInBytes and ::dstY specify the base address of the destination data
- * for the copy.
- *
- * \par
- * For host pointers, the base address is
- * \code
- void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
- * element size.
- *
- * - ::WidthInBytes and ::Height specify the width (in bytes) and height of
- * the 2D copy being performed.
- * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes +
- * ::srcXInBytes, and ::dstPitch must be greater than or equal to
- * ::WidthInBytes + dstXInBytes.
- *
- * \par
- * ::cuMemcpy2D() returns an error if any pitch is greater than the maximum
- * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes back
- * pitches that always work with ::cuMemcpy2D(). On intra-device memory copies
- * (device to device, CUDA array to device, CUDA array to CUDA array),
- * ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch().
- * ::cuMemcpy2DUnaligned() does not have this restriction, but may run
- * significantly slower in the cases where ::cuMemcpy2D() would have returned
- * an error code.
- *
- * \param pCopy - Parameters for the memory copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpy2D,
- * ::cudaMemcpy2DToArray,
- * ::cudaMemcpy2DFromArray
- */
- CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
- /**
- * \brief Copies memory for 3D arrays
- *
- * Perform a 3D memory copy according to the parameters specified in
- * \p pCopy. The ::CUDA_MEMCPY3D structure is defined as:
- *
- * \code
- typedef struct CUDA_MEMCPY3D_st {
- unsigned int srcXInBytes, srcY, srcZ;
- unsigned int srcLOD;
- CUmemorytype srcMemoryType;
- const void *srcHost;
- CUdeviceptr srcDevice;
- CUarray srcArray;
- unsigned int srcPitch; // ignored when src is array
- unsigned int srcHeight; // ignored when src is array; may be 0 if Depth==1
- unsigned int dstXInBytes, dstY, dstZ;
- unsigned int dstLOD;
- CUmemorytype dstMemoryType;
- void *dstHost;
- CUdeviceptr dstDevice;
- CUarray dstArray;
- unsigned int dstPitch; // ignored when dst is array
- unsigned int dstHeight; // ignored when dst is array; may be 0 if Depth==1
- unsigned int WidthInBytes;
- unsigned int Height;
- unsigned int Depth;
- } CUDA_MEMCPY3D;
- * \endcode
- * where:
- * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
- * source and destination, respectively; ::CUmemorytype_enum is defined as:
- *
- * \code
- typedef enum CUmemorytype_enum {
- CU_MEMORYTYPE_HOST = 0x01,
- CU_MEMORYTYPE_DEVICE = 0x02,
- CU_MEMORYTYPE_ARRAY = 0x03,
- CU_MEMORYTYPE_UNIFIED = 0x04
- } CUmemorytype;
- * \endcode
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::srcArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and
- * ::srcHeight specify the (host) base address of the source data, the bytes
- * per row, and the height of each 2D slice of the 3D array. ::srcArray is
- * ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch and
- * ::srcHeight specify the (device) base address of the source data, the bytes
- * per row, and the height of each 2D slice of the 3D array. ::srcArray is
- * ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
- * handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and
- * ::srcHeight are ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::dstArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
- * specify the (host) base address of the destination data, the bytes per row,
- * and the height of each 2D slice of the 3D array. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
- * specify the (device) base address of the destination data, the bytes per
- * row, and the height of each 2D slice of the 3D array. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
- * handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and
- * ::dstHeight are ignored.
- *
- * - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the source
- * data for the copy.
- *
- * \par
- * For host pointers, the starting address is
- * \code
- void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
- * element size.
- *
- * - dstXInBytes, ::dstY and ::dstZ specify the base address of the
- * destination data for the copy.
- *
- * \par
- * For host pointers, the base address is
- * \code
- void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
- * element size.
- *
- * - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), height
- * and depth of the 3D copy being performed.
- * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes +
- * ::srcXInBytes, and ::dstPitch must be greater than or equal to
- * ::WidthInBytes + dstXInBytes.
- * - If specified, ::srcHeight must be greater than or equal to ::Height +
- * ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::dstY.
- *
- * \par
- * ::cuMemcpy3D() returns an error if any pitch is greater than the maximum
- * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH).
- *
- * The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must be
- * set to 0.
- *
- * \param pCopy - Parameters for the memory copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMemcpy3D
- */
- CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
- /**
- * \brief Copies memory between contexts
- *
- * Perform a 3D memory copy according to the parameters specified in
- * \p pCopy. See the definition of the ::CUDA_MEMCPY3D_PEER structure
- * for documentation of its parameters.
- *
- * \param pCopy - Parameters for the memory copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuMemcpyDtoD, ::cuMemcpyPeer, ::cuMemcpyDtoDAsync, ::cuMemcpyPeerAsync,
- * ::cuMemcpy3DPeerAsync,
- * ::cudaMemcpy3DPeer
- */
- CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy);
- /**
- * \brief Copies memory asynchronously
- *
- * Copies data between two pointers.
- * \p dst and \p src are base pointers of the destination and source, respectively.
- * \p ByteCount specifies the number of bytes to copy.
- * Note that this function infers the type of the transfer (host to host, host to
- * device, device to device, or device to host) from the pointer values. This
- * function is only allowed in contexts which support unified addressing.
- *
- * \param dst - Destination unified virtual address space pointer
- * \param src - Source unified virtual address space pointer
- * \param ByteCount - Size of memory copy in bytes
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemcpyAsync,
- * ::cudaMemcpyToSymbolAsync,
- * ::cudaMemcpyFromSymbolAsync
- */
- CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream);
- /**
- * \brief Copies device memory between two contexts asynchronously.
- *
- * Copies from device memory in one context to device memory in another
- * context. \p dstDevice is the base device pointer of the destination memory
- * and \p dstContext is the destination context. \p srcDevice is the base
- * device pointer of the source memory and \p srcContext is the source pointer.
- * \p ByteCount specifies the number of bytes to copy.
- *
- * \param dstDevice - Destination device pointer
- * \param dstContext - Destination context
- * \param srcDevice - Source device pointer
- * \param srcContext - Source context
- * \param ByteCount - Size of memory copy in bytes
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuMemcpyDtoD, ::cuMemcpyPeer, ::cuMemcpy3DPeer, ::cuMemcpyDtoDAsync,
- * ::cuMemcpy3DPeerAsync,
- * ::cudaMemcpyPeerAsync
- */
- CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream);
- /**
- * \brief Copies memory from Host to Device
- *
- * Copies from host memory to device memory. \p dstDevice and \p srcHost are
- * the base addresses of the destination and source, respectively. \p ByteCount
- * specifies the number of bytes to copy.
- *
- * \param dstDevice - Destination device pointer
- * \param srcHost - Source host pointer
- * \param ByteCount - Size of memory copy in bytes
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemcpyAsync,
- * ::cudaMemcpyToSymbolAsync
- */
- CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
- /**
- * \brief Copies memory from Device to Host
- *
- * Copies from device to host memory. \p dstHost and \p srcDevice specify the
- * base pointers of the destination and source, respectively. \p ByteCount
- * specifies the number of bytes to copy.
- *
- * \param dstHost - Destination host pointer
- * \param srcDevice - Source device pointer
- * \param ByteCount - Size of memory copy in bytes
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemcpyAsync,
- * ::cudaMemcpyFromSymbolAsync
- */
- CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
- /**
- * \brief Copies memory from Device to Device
- *
- * Copies from device memory to device memory. \p dstDevice and \p srcDevice
- * are the base pointers of the destination and source, respectively.
- * \p ByteCount specifies the number of bytes to copy.
- *
- * \param dstDevice - Destination device pointer
- * \param srcDevice - Source device pointer
- * \param ByteCount - Size of memory copy in bytes
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemcpyAsync,
- * ::cudaMemcpyToSymbolAsync,
- * ::cudaMemcpyFromSymbolAsync
- */
- CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
- /**
- * \brief Copies memory from Host to Array
- *
- * Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset
- * specify the CUDA array handle and starting offset in bytes of the
- * destination data. \p srcHost specifies the base address of the source.
- * \p ByteCount specifies the number of bytes to copy.
- *
- * \param dstArray - Destination array
- * \param dstOffset - Offset in bytes of destination array
- * \param srcHost - Source host pointer
- * \param ByteCount - Size of memory copy in bytes
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemcpyToArrayAsync
- */
- CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream);
- /**
- * \brief Copies memory from Array to Host
- *
- * Copies from one 1D CUDA array to host memory. \p dstHost specifies the base
- * pointer of the destination. \p srcArray and \p srcOffset specify the CUDA
- * array handle and starting offset in bytes of the source data.
- * \p ByteCount specifies the number of bytes to copy.
- *
- * \param dstHost - Destination pointer
- * \param srcArray - Source array
- * \param srcOffset - Offset in bytes of source array
- * \param ByteCount - Size of memory copy in bytes
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- * \note_memcpy
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemcpyFromArrayAsync
- */
- CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
- /**
- * \brief Copies memory for 2D arrays
- *
- * Perform a 2D memory copy according to the parameters specified in \p pCopy.
- * The ::CUDA_MEMCPY2D structure is defined as:
- *
- * \code
- typedef struct CUDA_MEMCPY2D_st {
- unsigned int srcXInBytes, srcY;
- CUmemorytype srcMemoryType;
- const void *srcHost;
- CUdeviceptr srcDevice;
- CUarray srcArray;
- unsigned int srcPitch;
- unsigned int dstXInBytes, dstY;
- CUmemorytype dstMemoryType;
- void *dstHost;
- CUdeviceptr dstDevice;
- CUarray dstArray;
- unsigned int dstPitch;
- unsigned int WidthInBytes;
- unsigned int Height;
- } CUDA_MEMCPY2D;
- * \endcode
- * where:
- * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
- * source and destination, respectively; ::CUmemorytype_enum is defined as:
- *
- * \code
- typedef enum CUmemorytype_enum {
- CU_MEMORYTYPE_HOST = 0x01,
- CU_MEMORYTYPE_DEVICE = 0x02,
- CU_MEMORYTYPE_ARRAY = 0x03,
- CU_MEMORYTYPE_UNIFIED = 0x04
- } CUmemorytype;
- * \endcode
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch
- * specify the (host) base address of the source data and the bytes per row to
- * apply. ::srcArray is ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::srcArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch
- * specify the (device) base address of the source data and the bytes per row
- * to apply. ::srcArray is ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
- * handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are
- * ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::dstArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
- * specify the (host) base address of the destination data and the bytes per
- * row to apply. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
- * specify the (device) base address of the destination data and the bytes per
- * row to apply. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
- * handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch are
- * ignored.
- *
- * - ::srcXInBytes and ::srcY specify the base address of the source data for
- * the copy.
- *
- * \par
- * For host pointers, the starting address is
- * \code
- void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
- * element size.
- *
- * - ::dstXInBytes and ::dstY specify the base address of the destination data
- * for the copy.
- *
- * \par
- * For host pointers, the base address is
- * \code
- void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
- * element size.
- *
- * - ::WidthInBytes and ::Height specify the width (in bytes) and height of
- * the 2D copy being performed.
- * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes +
- * ::srcXInBytes, and ::dstPitch must be greater than or equal to
- * ::WidthInBytes + dstXInBytes.
- * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes +
- * ::srcXInBytes, and ::dstPitch must be greater than or equal to
- * ::WidthInBytes + dstXInBytes.
- * - If specified, ::srcHeight must be greater than or equal to ::Height +
- * ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::dstY.
- *
- * \par
- * ::cuMemcpy2DAsync() returns an error if any pitch is greater than the maximum
- * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes back
- * pitches that always work with ::cuMemcpy2D(). On intra-device memory copies
- * (device to device, CUDA array to device, CUDA array to CUDA array),
- * ::cuMemcpy2DAsync() may fail for pitches not computed by ::cuMemAllocPitch().
- *
- * \param pCopy - Parameters for the memory copy
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemcpy2DAsync,
- * ::cudaMemcpy2DToArrayAsync,
- * ::cudaMemcpy2DFromArrayAsync
- */
- CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream);
- /**
- * \brief Copies memory for 3D arrays
- *
- * Perform a 3D memory copy according to the parameters specified in
- * \p pCopy. The ::CUDA_MEMCPY3D structure is defined as:
- *
- * \code
- typedef struct CUDA_MEMCPY3D_st {
- unsigned int srcXInBytes, srcY, srcZ;
- unsigned int srcLOD;
- CUmemorytype srcMemoryType;
- const void *srcHost;
- CUdeviceptr srcDevice;
- CUarray srcArray;
- unsigned int srcPitch; // ignored when src is array
- unsigned int srcHeight; // ignored when src is array; may be 0 if Depth==1
- unsigned int dstXInBytes, dstY, dstZ;
- unsigned int dstLOD;
- CUmemorytype dstMemoryType;
- void *dstHost;
- CUdeviceptr dstDevice;
- CUarray dstArray;
- unsigned int dstPitch; // ignored when dst is array
- unsigned int dstHeight; // ignored when dst is array; may be 0 if Depth==1
- unsigned int WidthInBytes;
- unsigned int Height;
- unsigned int Depth;
- } CUDA_MEMCPY3D;
- * \endcode
- * where:
- * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
- * source and destination, respectively; ::CUmemorytype_enum is defined as:
- *
- * \code
- typedef enum CUmemorytype_enum {
- CU_MEMORYTYPE_HOST = 0x01,
- CU_MEMORYTYPE_DEVICE = 0x02,
- CU_MEMORYTYPE_ARRAY = 0x03,
- CU_MEMORYTYPE_UNIFIED = 0x04
- } CUmemorytype;
- * \endcode
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::srcArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and
- * ::srcHeight specify the (host) base address of the source data, the bytes
- * per row, and the height of each 2D slice of the 3D array. ::srcArray is
- * ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch and
- * ::srcHeight specify the (device) base address of the source data, the bytes
- * per row, and the height of each 2D slice of the 3D array. ::srcArray is
- * ignored.
- *
- * \par
- * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
- * handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and
- * ::srcHeight are ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch
- * specify the (unified virtual address space) base address of the source data
- * and the bytes per row to apply. ::dstArray is ignored.
- * This value may be used only if unified addressing is supported in the calling
- * context.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
- * specify the (host) base address of the destination data, the bytes per row,
- * and the height of each 2D slice of the 3D array. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
- * specify the (device) base address of the destination data, the bytes per
- * row, and the height of each 2D slice of the 3D array. ::dstArray is ignored.
- *
- * \par
- * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
- * handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and
- * ::dstHeight are ignored.
- *
- * - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the source
- * data for the copy.
- *
- * \par
- * For host pointers, the starting address is
- * \code
- void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
- * element size.
- *
- * - dstXInBytes, ::dstY and ::dstZ specify the base address of the
- * destination data for the copy.
- *
- * \par
- * For host pointers, the base address is
- * \code
- void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes);
- * \endcode
- *
- * \par
- * For device pointers, the starting address is
- * \code
- CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes;
- * \endcode
- *
- * \par
- * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
- * element size.
- *
- * - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), height
- * and depth of the 3D copy being performed.
- * - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes +
- * ::srcXInBytes, and ::dstPitch must be greater than or equal to
- * ::WidthInBytes + dstXInBytes.
- * - If specified, ::srcHeight must be greater than or equal to ::Height +
- * ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::dstY.
- *
- * \par
- * ::cuMemcpy3DAsync() returns an error if any pitch is greater than the maximum
- * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH).
- *
- * The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must be
- * set to 0.
- *
- * \param pCopy - Parameters for the memory copy
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemcpy3DAsync
- */
- CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream);
- /**
- * \brief Copies memory between contexts asynchronously.
- *
- * Perform a 3D memory copy according to the parameters specified in
- * \p pCopy. See the definition of the ::CUDA_MEMCPY3D_PEER structure
- * for documentation of its parameters.
- *
- * \param pCopy - Parameters for the memory copy
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuMemcpyDtoD, ::cuMemcpyPeer, ::cuMemcpyDtoDAsync, ::cuMemcpyPeerAsync,
- * ::cuMemcpy3DPeerAsync,
- * ::cudaMemcpy3DPeerAsync
- */
- CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, CUstream hStream);
- /**
- * \brief Performs a batch of memory copies asynchronously.
- *
- * Performs a batch of memory copies. The batch as a whole executes in stream order but copies within a
- * batch are not guaranteed to execute in any specific order. This API only supports pointer-to-pointer copies.
- * For copies involving CUDA arrays, please see ::cuMemcpy3DBatchAsync.
- *
- * Performs memory copies from source buffers specified in \p srcs to destination buffers specified in \p dsts.
- * The size of each copy is specified in \p sizes. All three arrays must be of the same length as specified
- * by \p count. Since there are no ordering guarantees for copies within a batch, specifying any dependent copies
- * within a batch will result in undefined behavior.
- *
- * Every copy in the batch has to be associated with a set of attributes specified in the \p attrs array.
- * Each entry in this array can apply to more than one copy. This can be done by specifying in the \p attrsIdxs array,
- * the index of the first copy that the corresponding entry in the \p attrs array applies to. Both \p attrs and
- * \p attrsIdxs must be of the same length as specified by \p numAttrs. For example, if a batch has 10 copies listed
- * in dst/src/sizes, the first 6 of which have one set of attributes and the remaining 4 another, then \p numAttrs
- * will be 2, \p attrsIdxs will be {0, 6} and \p attrs will contains the two sets of attributes. Note that the first entry
- * in \p attrsIdxs must always be 0. Also, each entry must be greater than the previous entry and the last entry should be
- * less than \p count. Furthermore, \p numAttrs must be lesser than or equal to \p count.
- *
- * The ::CUmemcpyAttributes::srcAccessOrder indicates the source access ordering to be observed for copies associated
- * with the attribute. If the source access order is set to ::CU_MEMCPY_SRC_ACCESS_ORDER_STREAM, then the source will
- * be accessed in stream order. If the source access order is set to ::CU_MEMCPY_SRC_ACCESS_ORDER_DURING_API_CALL then
- * it indicates that access to the source pointer can be out of stream order and all accesses must be complete before
- * the API call returns. This flag is suited for ephemeral sources (ex., stack variables) when it's known that no prior
- * operations in the stream can be accessing the memory and also that the lifetime of the memory is limited to the scope
- * that the source variable was declared in. Specifying this flag allows the driver to optimize the copy and removes the
- * need for the user to synchronize the stream after the API call. If the source access order is set to
- * ::CU_MEMCPY_SRC_ACCESS_ORDER_ANY then it indicates that access to the source pointer can be out of stream order and the
- * accesses can happen even after the API call returns. This flag is suited for host pointers allocated
- * outside CUDA (ex., via malloc) when it's known that no prior operations in the stream can be accessing the memory.
- * Specifying this flag allows the driver to optimize the copy on certain platforms. Each memcpy operation in the batch must
- * have a valid ::CUmemcpyAttributes corresponding to it including the appropriate srcAccessOrder setting, otherwise the API
- * will return ::CUDA_ERROR_INVALID_VALUE.
- *
- * The ::CUmemcpyAttributes::srcLocHint and ::CUmemcpyAttributes::dstLocHint allows applications to specify hint locations
- * for operands of a copy when the operand doesn't have a fixed location. That is, these hints are
- * only applicable for managed memory pointers on devices where ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS is true or
- * system-allocated pageable memory on devices where ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS is true.
- * For other cases, these hints are ignored.
- *
- * The ::CUmemcpyAttributes::flags field can be used to specify certain flags for copies. Setting the
- * ::CU_MEMCPY_FLAG_PREFER_OVERLAP_WITH_COMPUTE flag indicates that the associated copies should preferably overlap with
- * any compute work. Note that this flag is a hint and can be ignored depending on the platform and other parameters of the copy.
- *
- * If any error is encountered while parsing the batch, the index within the batch where the error was encountered
- * will be returned in \p failIdx.
- *
- * \param dsts - Array of destination pointers.
- * \param srcs - Array of memcpy source pointers.
- * \param sizes - Array of sizes for memcpy operations.
- * \param count - Size of \p dsts, \p srcs and \p sizes arrays
- * \param attrs - Array of memcpy attributes.
- * \param attrsIdxs - Array of indices to specify which copies each entry in the \p attrs array applies to.
- The attributes specified in attrs[k] will be applied to copies starting from attrsIdxs[k]
- through attrsIdxs[k+1] - 1. Also attrs[numAttrs-1] will apply to copies starting from
- attrsIdxs[numAttrs-1] through count - 1.
- * \param numAttrs - Size of \p attrs and \p attrsIdxs arrays.
- * \param failIdx - Pointer to a location to return the index of the copy where a failure was encountered.
- The value will be SIZE_MAX if the error doesn't pertain to any specific copy.
- * \param hStream - The stream to enqueue the operations in. Must not be legacy NULL stream.
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_DEINITIALIZED
- * ::CUDA_ERROR_NOT_INITIALIZED
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_async
- * \note_memcpy
- */
- CUresult CUDAAPI cuMemcpyBatchAsync(CUdeviceptr *dsts, CUdeviceptr *srcs, size_t *sizes, size_t count,
- CUmemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs,
- size_t *failIdx, CUstream hStream);
- /**
- * \brief Performs a batch of 3D memory copies asynchronously.
- *
- * Performs a batch of memory copies. The batch as a whole executes in stream order but copies within a
- * batch are not guaranteed to execute in any specific order. Note that this means specifying any dependent
- * copies within a batch will result in undefined behavior.
- *
- * Performs memory copies as specified in the \p opList array. The length of this array is specified in \p numOps.
- * Each entry in this array describes a copy operation. This includes among other things, the source and destination
- * operands for the copy as specified in ::CUDA_MEMCPY3D_BATCH_OP::src and ::CUDA_MEMCPY3D_BATCH_OP::dst respectively.
- * The source and destination operands of a copy can either be a pointer or a CUDA array. The width, height and depth
- * of a copy is specified in ::CUDA_MEMCPY3D_BATCH_OP::extent. The width, height and depth of a copy are specified in
- * elements and must not be zero. For pointer-to-pointer copies, the element size is considered to be 1. For pointer
- * to CUDA array or vice versa copies, the element size is determined by the CUDA array. For CUDA array to CUDA array copies,
- * the element size of the two CUDA arrays must match.
- *
- * For a given operand, if ::CUmemcpy3DOperand::type is specified as ::CU_MEMCPY_OPERAND_TYPE_POINTER, then
- * ::CUmemcpy3DOperand::op::ptr will be used. The ::CUmemcpy3DOperand::op::ptr::ptr field must contain the pointer where
- * the copy should begin. The ::CUmemcpy3DOperand::op::ptr::rowLength field specifies the length of each row in elements and
- * must either be zero or be greater than or equal to the width of the copy specified in ::CUDA_MEMCPY3D_BATCH_OP::extent::width.
- * The ::CUmemcpy3DOperand::op::ptr::layerHeight field specifies the height of each layer and must either be zero or be greater than
- * or equal to the height of the copy specified in ::CUDA_MEMCPY3D_BATCH_OP::extent::height. When either of these values is zero,
- * that aspect of the operand is considered to be tightly packed according to the copy extent. For managed memory pointers on devices where
- * ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS is true or system-allocated pageable memory on devices where
- * ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS is true, the ::CUmemcpy3DOperand::op::ptr::locHint field can be used to hint
- * the location of the operand.
- *
- * If an operand's type is specified as ::CU_MEMCPY_OPERAND_TYPE_ARRAY, then ::CUmemcpy3DOperand::op::array will be used.
- * The ::CUmemcpy3DOperand::op::array::array field specifies the CUDA array and ::CUmemcpy3DOperand::op::array::offset specifies
- * the 3D offset into that array where the copy begins.
- *
- * The ::CUmemcpyAttributes::srcAccessOrder indicates the source access ordering to be observed for copies associated
- * with the attribute. If the source access order is set to ::CU_MEMCPY_SRC_ACCESS_ORDER_STREAM, then the source will
- * be accessed in stream order. If the source access order is set to ::CU_MEMCPY_SRC_ACCESS_ORDER_DURING_API_CALL then
- * it indicates that access to the source pointer can be out of stream order and all accesses must be complete before
- * the API call returns. This flag is suited for ephemeral sources (ex., stack variables) when it's known that no prior
- * operations in the stream can be accessing the memory and also that the lifetime of the memory is limited to the scope
- * that the source variable was declared in. Specifying this flag allows the driver to optimize the copy and removes the
- * need for the user to synchronize the stream after the API call. If the source access order is set to
- * ::CU_MEMCPY_SRC_ACCESS_ORDER_ANY then it indicates that access to the source pointer can be out of stream order and the
- * accesses can happen even after the API call returns. This flag is suited for host pointers allocated
- * outside CUDA (ex., via malloc) when it's known that no prior operations in the stream can be accessing the memory.
- * Specifying this flag allows the driver to optimize the copy on certain platforms. Each memcopy operation in \p opList must
- * have a valid srcAccessOrder setting, otherwise this API will return ::CUDA_ERROR_INVALID_VALUE.
- *
- * The ::CUmemcpyAttributes::flags field can be used to specify certain flags for copies. Setting the
- * ::CU_MEMCPY_FLAG_PREFER_OVERLAP_WITH_COMPUTE flag indicates that the associated copies should preferably overlap with
- * any compute work. Note that this flag is a hint and can be ignored depending on the platform and other parameters of the copy.
- *
- * If any error is encountered while parsing the batch, the index within the batch where the error was encountered
- * will be returned in \p failIdx.
- *
- * \param numOps - Total number of memcpy operations.
- * \param opList - Array of size \p numOps containing the actual memcpy operations.
- * \param failIdx - Pointer to a location to return the index of the copy where a failure was encountered.
- * The value will be SIZE_MAX if the error doesn't pertain to any specific copy.
- * \param flags - Flags for future use, must be zero now.
- * \param hStream - The stream to enqueue the operations in. Must not be default NULL stream.
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_DEINITIALIZED
- * ::CUDA_ERROR_NOT_INITIALIZED
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_async
- * \note_memcpy
- */
- CUresult CUDAAPI cuMemcpy3DBatchAsync(size_t numOps, CUDA_MEMCPY3D_BATCH_OP *opList,
- size_t *failIdx, unsigned long long flags, CUstream hStream);
- /**
- * \brief Initializes device memory
- *
- * Sets the memory range of \p N 8-bit values to the specified value
- * \p uc.
- *
- * \param dstDevice - Destination device pointer
- * \param uc - Value to set
- * \param N - Number of elements
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemset
- */
- CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N);
- /**
- * \brief Initializes device memory
- *
- * Sets the memory range of \p N 16-bit values to the specified value
- * \p us. The \p dstDevice pointer must be two byte aligned.
- *
- * \param dstDevice - Destination device pointer
- * \param us - Value to set
- * \param N - Number of elements
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemset
- */
- CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N);
- /**
- * \brief Initializes device memory
- *
- * Sets the memory range of \p N 32-bit values to the specified value
- * \p ui. The \p dstDevice pointer must be four byte aligned.
- *
- * \param dstDevice - Destination device pointer
- * \param ui - Value to set
- * \param N - Number of elements
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32Async,
- * ::cudaMemset
- */
- CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N);
- /**
- * \brief Initializes device memory
- *
- * Sets the 2D memory range of \p Width 8-bit values to the specified value
- * \p uc. \p Height specifies the number of rows to set, and \p dstPitch
- * specifies the number of bytes between each row. This function performs
- * fastest when the pitch is one that has been passed back by
- * ::cuMemAllocPitch().
- *
- * \param dstDevice - Destination device pointer
- * \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1)
- * \param uc - Value to set
- * \param Width - Width of row
- * \param Height - Number of rows
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemset2D
- */
- CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
- /**
- * \brief Initializes device memory
- *
- * Sets the 2D memory range of \p Width 16-bit values to the specified value
- * \p us. \p Height specifies the number of rows to set, and \p dstPitch
- * specifies the number of bytes between each row. The \p dstDevice pointer
- * and \p dstPitch offset must be two byte aligned. This function performs
- * fastest when the pitch is one that has been passed back by
- * ::cuMemAllocPitch().
- *
- * \param dstDevice - Destination device pointer
- * \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1)
- * \param us - Value to set
- * \param Width - Width of row
- * \param Height - Number of rows
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemset2D
- */
- CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
- /**
- * \brief Initializes device memory
- *
- * Sets the 2D memory range of \p Width 32-bit values to the specified value
- * \p ui. \p Height specifies the number of rows to set, and \p dstPitch
- * specifies the number of bytes between each row. The \p dstDevice pointer
- * and \p dstPitch offset must be four byte aligned. This function performs
- * fastest when the pitch is one that has been passed back by
- * ::cuMemAllocPitch().
- *
- * \param dstDevice - Destination device pointer
- * \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1)
- * \param ui - Value to set
- * \param Width - Width of row
- * \param Height - Number of rows
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemset2D
- */
- CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
- /**
- * \brief Sets device memory
- *
- * Sets the memory range of \p N 8-bit values to the specified value
- * \p uc.
- *
- * \param dstDevice - Destination device pointer
- * \param uc - Value to set
- * \param N - Number of elements
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- * \note_null_stream
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemsetAsync
- */
- CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
- /**
- * \brief Sets device memory
- *
- * Sets the memory range of \p N 16-bit values to the specified value
- * \p us. The \p dstDevice pointer must be two byte aligned.
- *
- * \param dstDevice - Destination device pointer
- * \param us - Value to set
- * \param N - Number of elements
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- * \note_null_stream
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemsetAsync
- */
- CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
- /**
- * \brief Sets device memory
- *
- * Sets the memory range of \p N 32-bit values to the specified value
- * \p ui. The \p dstDevice pointer must be four byte aligned.
- *
- * \param dstDevice - Destination device pointer
- * \param ui - Value to set
- * \param N - Number of elements
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- * \note_null_stream
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, ::cuMemsetD32,
- * ::cudaMemsetAsync
- */
- CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
- /**
- * \brief Sets device memory
- *
- * Sets the 2D memory range of \p Width 8-bit values to the specified value
- * \p uc. \p Height specifies the number of rows to set, and \p dstPitch
- * specifies the number of bytes between each row. This function performs
- * fastest when the pitch is one that has been passed back by
- * ::cuMemAllocPitch().
- *
- * \param dstDevice - Destination device pointer
- * \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1)
- * \param uc - Value to set
- * \param Width - Width of row
- * \param Height - Number of rows
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- * \note_null_stream
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemset2DAsync
- */
- CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
- /**
- * \brief Sets device memory
- *
- * Sets the 2D memory range of \p Width 16-bit values to the specified value
- * \p us. \p Height specifies the number of rows to set, and \p dstPitch
- * specifies the number of bytes between each row. The \p dstDevice pointer
- * and \p dstPitch offset must be two byte aligned. This function performs
- * fastest when the pitch is one that has been passed back by
- * ::cuMemAllocPitch().
- *
- * \param dstDevice - Destination device pointer
- * \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1)
- * \param us - Value to set
- * \param Width - Width of row
- * \param Height - Number of rows
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- * \note_null_stream
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemset2DAsync
- */
- CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
- /**
- * \brief Sets device memory
- *
- * Sets the 2D memory range of \p Width 32-bit values to the specified value
- * \p ui. \p Height specifies the number of rows to set, and \p dstPitch
- * specifies the number of bytes between each row. The \p dstDevice pointer
- * and \p dstPitch offset must be four byte aligned. This function performs
- * fastest when the pitch is one that has been passed back by
- * ::cuMemAllocPitch().
- *
- * \param dstDevice - Destination device pointer
- * \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1)
- * \param ui - Value to set
- * \param Width - Width of row
- * \param Height - Number of rows
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- * \note_memset
- * \note_null_stream
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
- * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32,
- * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
- * ::cuMemsetD32, ::cuMemsetD32Async,
- * ::cudaMemset2DAsync
- */
- CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
- /**
- * \brief Creates a 1D or 2D CUDA array
- *
- * Creates a CUDA array according to the ::CUDA_ARRAY_DESCRIPTOR structure
- * \p pAllocateArray and returns a handle to the new CUDA array in \p *pHandle.
- * The ::CUDA_ARRAY_DESCRIPTOR is defined as:
- *
- * \code
- typedef struct {
- unsigned int Width;
- unsigned int Height;
- CUarray_format Format;
- unsigned int NumChannels;
- } CUDA_ARRAY_DESCRIPTOR;
- * \endcode
- * where:
- *
- * - \p Width, and \p Height are the width, and height of the CUDA array (in
- * elements); the CUDA array is one-dimensional if height is 0, two-dimensional
- * otherwise;
- * - ::Format specifies the format of the elements; ::CUarray_format is
- * defined as:
- * \code
- typedef enum CUarray_format_enum {
- CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
- CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
- CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
- CU_AD_FORMAT_SIGNED_INT8 = 0x08,
- CU_AD_FORMAT_SIGNED_INT16 = 0x09,
- CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
- CU_AD_FORMAT_HALF = 0x10,
- CU_AD_FORMAT_FLOAT = 0x20,
- CU_AD_FORMAT_NV12 = 0xb0,
- CU_AD_FORMAT_UNORM_INT8X1 = 0xc0,
- CU_AD_FORMAT_UNORM_INT8X2 = 0xc1,
- CU_AD_FORMAT_UNORM_INT8X4 = 0xc2,
- CU_AD_FORMAT_UNORM_INT16X1 = 0xc3,
- CU_AD_FORMAT_UNORM_INT16X2 = 0xc4,
- CU_AD_FORMAT_UNORM_INT16X4 = 0xc5,
- CU_AD_FORMAT_SNORM_INT8X1 = 0xc6,
- CU_AD_FORMAT_SNORM_INT8X2 = 0xc7,
- CU_AD_FORMAT_SNORM_INT8X4 = 0xc8,
- CU_AD_FORMAT_SNORM_INT16X1 = 0xc9,
- CU_AD_FORMAT_SNORM_INT16X2 = 0xca,
- CU_AD_FORMAT_SNORM_INT16X4 = 0xcb,
- CU_AD_FORMAT_BC1_UNORM = 0x91,
- CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92,
- CU_AD_FORMAT_BC2_UNORM = 0x93,
- CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94,
- CU_AD_FORMAT_BC3_UNORM = 0x95,
- CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96,
- CU_AD_FORMAT_BC4_UNORM = 0x97,
- CU_AD_FORMAT_BC4_SNORM = 0x98,
- CU_AD_FORMAT_BC5_UNORM = 0x99,
- CU_AD_FORMAT_BC5_SNORM = 0x9a,
- CU_AD_FORMAT_BC6H_UF16 = 0x9b,
- CU_AD_FORMAT_BC6H_SF16 = 0x9c,
- CU_AD_FORMAT_BC7_UNORM = 0x9d,
- CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e,
- CU_AD_FORMAT_P010 = 0x9f,
- CU_AD_FORMAT_P016 = 0xa1,
- CU_AD_FORMAT_NV16 = 0xa2,
- CU_AD_FORMAT_P210 = 0xa3,
- CU_AD_FORMAT_P216 = 0xa4,
- CU_AD_FORMAT_YUY2 = 0xa5,
- CU_AD_FORMAT_Y210 = 0xa6,
- CU_AD_FORMAT_Y216 = 0xa7,
- CU_AD_FORMAT_AYUV = 0xa8,
- CU_AD_FORMAT_Y410 = 0xa9,
- CU_AD_FORMAT_Y416 = 0xb1,
- CU_AD_FORMAT_Y444_PLANAR8 = 0xb2,
- CU_AD_FORMAT_Y444_PLANAR10 = 0xb3,
- CU_AD_FORMAT_YUV444_8bit_SemiPlanar = 0xb4,
- CU_AD_FORMAT_YUV444_16bit_SemiPlanar = 0xb5,
- CU_AD_FORMAT_UNORM_INT_101010_2 = 0x50,
- } CUarray_format;
- * \endcode
- * - \p NumChannels specifies the number of packed components per CUDA array
- * element; it may be 1, 2, or 4;
- *
- * Here are examples of CUDA array descriptions:
- *
- * Description for a CUDA array of 2048 floats:
- * \code
- CUDA_ARRAY_DESCRIPTOR desc;
- desc.Format = CU_AD_FORMAT_FLOAT;
- desc.NumChannels = 1;
- desc.Width = 2048;
- desc.Height = 1;
- * \endcode
- *
- * Description for a 64 x 64 CUDA array of floats:
- * \code
- CUDA_ARRAY_DESCRIPTOR desc;
- desc.Format = CU_AD_FORMAT_FLOAT;
- desc.NumChannels = 1;
- desc.Width = 64;
- desc.Height = 64;
- * \endcode
- *
- * Description for a \p width x \p height CUDA array of 64-bit, 4x16-bit
- * float16's:
- * \code
- CUDA_ARRAY_DESCRIPTOR desc;
- desc.Format = CU_AD_FORMAT_HALF;
- desc.NumChannels = 4;
- desc.Width = width;
- desc.Height = height;
- * \endcode
- *
- * Description for a \p width x \p height CUDA array of 16-bit elements, each
- * of which is two 8-bit unsigned chars:
- * \code
- CUDA_ARRAY_DESCRIPTOR arrayDesc;
- desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
- desc.NumChannels = 2;
- desc.Width = width;
- desc.Height = height;
- * \endcode
- *
- * \param pHandle - Returned array
- * \param pAllocateArray - Array descriptor
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMallocArray
- */
- CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
- /**
- * \brief Get a 1D or 2D CUDA array descriptor
- *
- * Returns in \p *pArrayDescriptor a descriptor containing information on the
- * format and dimensions of the CUDA array \p hArray. It is useful for
- * subroutines that have been passed a CUDA array, but need to know the CUDA
- * array parameters for validation or other purposes.
- *
- * \param pArrayDescriptor - Returned array descriptor
- * \param hArray - Array to get descriptor of
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaArrayGetInfo
- */
- CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
- /**
- * \brief Returns the layout properties of a sparse CUDA array
- *
- * Returns the layout properties of a sparse CUDA array in \p sparseProperties
- * If the CUDA array is not allocated with flag ::CUDA_ARRAY3D_SPARSE
- * ::CUDA_ERROR_INVALID_VALUE will be returned.
- *
- * If the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL,
- * then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize represents the total size of the array. Otherwise, it will be zero.
- * Also, the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is always zero.
- * Note that the \p array must have been allocated using ::cuArrayCreate or ::cuArray3DCreate. For CUDA arrays obtained
- * using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned. Instead, ::cuMipmappedArrayGetSparseProperties
- * must be used to obtain the sparse properties of the entire CUDA mipmapped array to which \p array belongs to.
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \param[out] sparseProperties - Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES
- * \param[in] array - CUDA array to get the sparse properties of
- * \sa ::cuMipmappedArrayGetSparseProperties, ::cuMemMapArrayAsync
- */
- CUresult CUDAAPI cuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUarray array);
- /**
- * \brief Returns the layout properties of a sparse CUDA mipmapped array
- *
- * Returns the sparse array layout properties in \p sparseProperties
- * If the CUDA mipmapped array is not allocated with flag ::CUDA_ARRAY3D_SPARSE
- * ::CUDA_ERROR_INVALID_VALUE will be returned.
- *
- * For non-layered CUDA mipmapped arrays, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize returns the
- * size of the mip tail region. The mip tail region includes all mip levels whose width, height or depth
- * is less than that of the tile.
- * For layered CUDA mipmapped arrays, if ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL,
- * then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies the size of the mip tail of all layers combined.
- * Otherwise, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies mip tail size per layer.
- * The returned value of ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is valid only if ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize is non-zero.
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \param[out] sparseProperties - Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES
- * \param[in] mipmap - CUDA mipmapped array to get the sparse properties of
- * \sa ::cuArrayGetSparseProperties, ::cuMemMapArrayAsync
- */
- CUresult CUDAAPI cuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUmipmappedArray mipmap);
- /**
- * \brief Returns the memory requirements of a CUDA array
- *
- * Returns the memory requirements of a CUDA array in \p memoryRequirements
- * If the CUDA array is not allocated with flag ::CUDA_ARRAY3D_DEFERRED_MAPPING
- * ::CUDA_ERROR_INVALID_VALUE will be returned.
- *
- * The returned value in ::CUDA_ARRAY_MEMORY_REQUIREMENTS::size
- * represents the total size of the CUDA array.
- * The returned value in ::CUDA_ARRAY_MEMORY_REQUIREMENTS::alignment
- * represents the alignment necessary for mapping the CUDA array.
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \param[out] memoryRequirements - Pointer to ::CUDA_ARRAY_MEMORY_REQUIREMENTS
- * \param[in] array - CUDA array to get the memory requirements of
- * \param[in] device - Device to get the memory requirements for
- * \sa ::cuMipmappedArrayGetMemoryRequirements, ::cuMemMapArrayAsync
- */
- CUresult CUDAAPI cuArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements, CUarray array, CUdevice device);
-
- /**
- * \brief Returns the memory requirements of a CUDA mipmapped array
- *
- * Returns the memory requirements of a CUDA mipmapped array in \p memoryRequirements
- * If the CUDA mipmapped array is not allocated with flag ::CUDA_ARRAY3D_DEFERRED_MAPPING
- * ::CUDA_ERROR_INVALID_VALUE will be returned.
- *
- * The returned value in ::CUDA_ARRAY_MEMORY_REQUIREMENTS::size
- * represents the total size of the CUDA mipmapped array.
- * The returned value in ::CUDA_ARRAY_MEMORY_REQUIREMENTS::alignment
- * represents the alignment necessary for mapping the CUDA mipmapped
- * array.
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \param[out] memoryRequirements - Pointer to ::CUDA_ARRAY_MEMORY_REQUIREMENTS
- * \param[in] mipmap - CUDA mipmapped array to get the memory requirements of
- * \param[in] device - Device to get the memory requirements for
- * \sa ::cuArrayGetMemoryRequirements, ::cuMemMapArrayAsync
- */
- CUresult CUDAAPI cuMipmappedArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements, CUmipmappedArray mipmap, CUdevice device);
- /**
- * \brief Gets a CUDA array plane from a CUDA array
- *
- * Returns in \p pPlaneArray a CUDA array that represents a single format plane
- * of the CUDA array \p hArray.
- *
- * If \p planeIdx is greater than the maximum number of planes in this array or if the array does
- * not have a multi-planar format e.g: ::CU_AD_FORMAT_NV12, then ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * Note that if the \p hArray has format ::CU_AD_FORMAT_NV12, then passing in 0 for \p planeIdx returns
- * a CUDA array of the same size as \p hArray but with one channel and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format.
- * If 1 is passed for \p planeIdx, then the returned CUDA array has half the height and width
- * of \p hArray with two channels and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format.
- *
- * \param pPlaneArray - Returned CUDA array referenced by the \p planeIdx
- * \param hArray - Multiplanar CUDA array
- * \param planeIdx - Plane index
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa
- * ::cuArrayCreate,
- * ::cudaArrayGetPlane
- */
- CUresult CUDAAPI cuArrayGetPlane(CUarray *pPlaneArray, CUarray hArray, unsigned int planeIdx);
- /**
- * \brief Destroys a CUDA array
- *
- * Destroys the CUDA array \p hArray.
- *
- * \param hArray - Array to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_ARRAY_IS_MAPPED,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaFreeArray
- */
- CUresult CUDAAPI cuArrayDestroy(CUarray hArray);
- /**
- * \brief Creates a 3D CUDA array
- *
- * Creates a CUDA array according to the ::CUDA_ARRAY3D_DESCRIPTOR structure
- * \p pAllocateArray and returns a handle to the new CUDA array in \p *pHandle.
- * The ::CUDA_ARRAY3D_DESCRIPTOR is defined as:
- *
- * \code
- typedef struct {
- unsigned int Width;
- unsigned int Height;
- unsigned int Depth;
- CUarray_format Format;
- unsigned int NumChannels;
- unsigned int Flags;
- } CUDA_ARRAY3D_DESCRIPTOR;
- * \endcode
- * where:
- *
- * - \p Width, \p Height, and \p Depth are the width, height, and depth of the
- * CUDA array (in elements); the following types of CUDA arrays can be allocated:
- * - A 1D array is allocated if \p Height and \p Depth extents are both zero.
- * - A 2D array is allocated if only \p Depth extent is zero.
- * - A 3D array is allocated if all three extents are non-zero.
- * - A 1D layered CUDA array is allocated if only \p Height is zero and the
- * ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number
- * of layers is determined by the depth extent.
- * - A 2D layered CUDA array is allocated if all three extents are non-zero and
- * the ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number
- * of layers is determined by the depth extent.
- * - A cubemap CUDA array is allocated if all three extents are non-zero and the
- * ::CUDA_ARRAY3D_CUBEMAP flag is set. \p Width must be equal to \p Height, and
- * \p Depth must be six. A cubemap is a special type of 2D layered CUDA array,
- * where the six layers represent the six faces of a cube. The order of the six
- * layers in memory is the same as that listed in ::CUarray_cubemap_face.
- * - A cubemap layered CUDA array is allocated if all three extents are non-zero,
- * and both, ::CUDA_ARRAY3D_CUBEMAP and ::CUDA_ARRAY3D_LAYERED flags are set.
- * \p Width must be equal to \p Height, and \p Depth must be a multiple of six.
- * A cubemap layered CUDA array is a special type of 2D layered CUDA array that
- * consists of a collection of cubemaps. The first six layers represent the first
- * cubemap, the next six layers form the second cubemap, and so on.
- *
- * - ::Format specifies the format of the elements; ::CUarray_format is
- * defined as:
- * \code
- typedef enum CUarray_format_enum {
- CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
- CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
- CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
- CU_AD_FORMAT_SIGNED_INT8 = 0x08,
- CU_AD_FORMAT_SIGNED_INT16 = 0x09,
- CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
- CU_AD_FORMAT_HALF = 0x10,
- CU_AD_FORMAT_FLOAT = 0x20,
- CU_AD_FORMAT_NV12 = 0xb0,
- CU_AD_FORMAT_UNORM_INT8X1 = 0xc0,
- CU_AD_FORMAT_UNORM_INT8X2 = 0xc1,
- CU_AD_FORMAT_UNORM_INT8X4 = 0xc2,
- CU_AD_FORMAT_UNORM_INT16X1 = 0xc3,
- CU_AD_FORMAT_UNORM_INT16X2 = 0xc4,
- CU_AD_FORMAT_UNORM_INT16X4 = 0xc5,
- CU_AD_FORMAT_SNORM_INT8X1 = 0xc6,
- CU_AD_FORMAT_SNORM_INT8X2 = 0xc7,
- CU_AD_FORMAT_SNORM_INT8X4 = 0xc8,
- CU_AD_FORMAT_SNORM_INT16X1 = 0xc9,
- CU_AD_FORMAT_SNORM_INT16X2 = 0xca,
- CU_AD_FORMAT_SNORM_INT16X4 = 0xcb,
- CU_AD_FORMAT_BC1_UNORM = 0x91,
- CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92,
- CU_AD_FORMAT_BC2_UNORM = 0x93,
- CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94,
- CU_AD_FORMAT_BC3_UNORM = 0x95,
- CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96,
- CU_AD_FORMAT_BC4_UNORM = 0x97,
- CU_AD_FORMAT_BC4_SNORM = 0x98,
- CU_AD_FORMAT_BC5_UNORM = 0x99,
- CU_AD_FORMAT_BC5_SNORM = 0x9a,
- CU_AD_FORMAT_BC6H_UF16 = 0x9b,
- CU_AD_FORMAT_BC6H_SF16 = 0x9c,
- CU_AD_FORMAT_BC7_UNORM = 0x9d,
- CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e,
- CU_AD_FORMAT_P010 = 0x9f,
- CU_AD_FORMAT_P016 = 0xa1,
- CU_AD_FORMAT_NV16 = 0xa2,
- CU_AD_FORMAT_P210 = 0xa3,
- CU_AD_FORMAT_P216 = 0xa4,
- CU_AD_FORMAT_YUY2 = 0xa5,
- CU_AD_FORMAT_Y210 = 0xa6,
- CU_AD_FORMAT_Y216 = 0xa7,
- CU_AD_FORMAT_AYUV = 0xa8,
- CU_AD_FORMAT_Y410 = 0xa9,
- CU_AD_FORMAT_Y416 = 0xb1,
- CU_AD_FORMAT_Y444_PLANAR8 = 0xb2,
- CU_AD_FORMAT_Y444_PLANAR10 = 0xb3,
- CU_AD_FORMAT_YUV444_8bit_SemiPlanar = 0xb4,
- CU_AD_FORMAT_YUV444_16bit_SemiPlanar = 0xb5,
- CU_AD_FORMAT_UNORM_INT_101010_2 = 0x50,
- } CUarray_format;
- * \endcode
- *
- * - \p NumChannels specifies the number of packed components per CUDA array
- * element; it may be 1, 2, or 4;
- *
- * - ::Flags may be set to
- * - ::CUDA_ARRAY3D_LAYERED to enable creation of layered CUDA arrays. If this flag is set,
- * \p Depth specifies the number of layers, not the depth of a 3D array.
- * - ::CUDA_ARRAY3D_SURFACE_LDST to enable surface references to be bound to the CUDA array.
- * If this flag is not set, ::cuSurfRefSetArray will fail when attempting to bind the CUDA array
- * to a surface reference.
- * - ::CUDA_ARRAY3D_CUBEMAP to enable creation of cubemaps. If this flag is set, \p Width must be
- * equal to \p Height, and \p Depth must be six. If the ::CUDA_ARRAY3D_LAYERED flag is also set,
- * then \p Depth must be a multiple of six.
- * - ::CUDA_ARRAY3D_TEXTURE_GATHER to indicate that the CUDA array will be used for texture gather.
- * Texture gather can only be performed on 2D CUDA arrays.
- *
- * \p Width, \p Height and \p Depth must meet certain size requirements as listed in the following table.
- * All values are specified in elements. Note that for brevity's sake, the full name of the device attribute
- * is not specified. For ex., TEXTURE1D_WIDTH refers to the device attribute
- * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH.
- *
- * Note that 2D CUDA arrays have different size requirements if the ::CUDA_ARRAY3D_TEXTURE_GATHER flag
- * is set. \p Width and \p Height must not be greater than ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH
- * and ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT respectively, in that case.
- *
- * <table>
- * <tr><td><b>CUDA array type</b></td>
- * <td><b>Valid extents that must always be met<br>{(width range in elements), (height range),
- * (depth range)}</b></td>
- * <td><b>Valid extents with CUDA_ARRAY3D_SURFACE_LDST set<br>
- * {(width range in elements), (height range), (depth range)}</b></td></tr>
- * <tr><td>1D</td>
- * <td><small>{ (1,TEXTURE1D_WIDTH), 0, 0 }</small></td>
- * <td><small>{ (1,SURFACE1D_WIDTH), 0, 0 }</small></td></tr>
- * <tr><td>2D</td>
- * <td><small>{ (1,TEXTURE2D_WIDTH), (1,TEXTURE2D_HEIGHT), 0 }</small></td>
- * <td><small>{ (1,SURFACE2D_WIDTH), (1,SURFACE2D_HEIGHT), 0 }</small></td></tr>
- * <tr><td>3D</td>
- * <td><small>{ (1,TEXTURE3D_WIDTH), (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) }
- * <br>OR<br>{ (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
- * (1,TEXTURE3D_DEPTH_ALTERNATE) }</small></td>
- * <td><small>{ (1,SURFACE3D_WIDTH), (1,SURFACE3D_HEIGHT),
- * (1,SURFACE3D_DEPTH) }</small></td></tr>
- * <tr><td>1D Layered</td>
- * <td><small>{ (1,TEXTURE1D_LAYERED_WIDTH), 0,
- * (1,TEXTURE1D_LAYERED_LAYERS) }</small></td>
- * <td><small>{ (1,SURFACE1D_LAYERED_WIDTH), 0,
- * (1,SURFACE1D_LAYERED_LAYERS) }</small></td></tr>
- * <tr><td>2D Layered</td>
- * <td><small>{ (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
- * (1,TEXTURE2D_LAYERED_LAYERS) }</small></td>
- * <td><small>{ (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT),
- * (1,SURFACE2D_LAYERED_LAYERS) }</small></td></tr>
- * <tr><td>Cubemap</td>
- * <td><small>{ (1,TEXTURECUBEMAP_WIDTH), (1,TEXTURECUBEMAP_WIDTH), 6 }</small></td>
- * <td><small>{ (1,SURFACECUBEMAP_WIDTH),
- * (1,SURFACECUBEMAP_WIDTH), 6 }</small></td></tr>
- * <tr><td>Cubemap Layered</td>
- * <td><small>{ (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
- * (1,TEXTURECUBEMAP_LAYERED_LAYERS) }</small></td>
- * <td><small>{ (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH),
- * (1,SURFACECUBEMAP_LAYERED_LAYERS) }</small></td></tr>
- * </table>
- *
- * Here are examples of CUDA array descriptions:
- *
- * Description for a CUDA array of 2048 floats:
- * \code
- CUDA_ARRAY3D_DESCRIPTOR desc;
- desc.Format = CU_AD_FORMAT_FLOAT;
- desc.NumChannels = 1;
- desc.Width = 2048;
- desc.Height = 0;
- desc.Depth = 0;
- * \endcode
- *
- * Description for a 64 x 64 CUDA array of floats:
- * \code
- CUDA_ARRAY3D_DESCRIPTOR desc;
- desc.Format = CU_AD_FORMAT_FLOAT;
- desc.NumChannels = 1;
- desc.Width = 64;
- desc.Height = 64;
- desc.Depth = 0;
- * \endcode
- *
- * Description for a \p width x \p height x \p depth CUDA array of 64-bit,
- * 4x16-bit float16's:
- * \code
- CUDA_ARRAY3D_DESCRIPTOR desc;
- desc.Format = CU_AD_FORMAT_HALF;
- desc.NumChannels = 4;
- desc.Width = width;
- desc.Height = height;
- desc.Depth = depth;
- * \endcode
- *
- * \param pHandle - Returned array
- * \param pAllocateArray - 3D array descriptor
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa ::cuArray3DGetDescriptor, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaMalloc3DArray
- */
- CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
- /**
- * \brief Get a 3D CUDA array descriptor
- *
- * Returns in \p *pArrayDescriptor a descriptor containing information on the
- * format and dimensions of the CUDA array \p hArray. It is useful for
- * subroutines that have been passed a CUDA array, but need to know the CUDA
- * array parameters for validation or other purposes.
- *
- * This function may be called on 1D and 2D arrays, in which case the \p Height
- * and/or \p Depth members of the descriptor struct will be set to 0.
- *
- * \param pArrayDescriptor - Returned 3D array descriptor
- * \param hArray - 3D array to get descriptor of
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED
- * \notefnerr
- *
- * \sa ::cuArray3DCreate, ::cuArrayCreate,
- * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
- * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
- * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
- * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
- * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
- * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
- * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
- * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
- * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32,
- * ::cudaArrayGetInfo
- */
- CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
- /**
- * \brief Creates a CUDA mipmapped array
- *
- * Creates a CUDA mipmapped array according to the ::CUDA_ARRAY3D_DESCRIPTOR structure
- * \p pMipmappedArrayDesc and returns a handle to the new CUDA mipmapped array in \p *pHandle.
- * \p numMipmapLevels specifies the number of mipmap levels to be allocated. This value is
- * clamped to the range [1, 1 + floor(log2(max(width, height, depth)))].
- *
- * The ::CUDA_ARRAY3D_DESCRIPTOR is defined as:
- *
- * \code
- typedef struct {
- unsigned int Width;
- unsigned int Height;
- unsigned int Depth;
- CUarray_format Format;
- unsigned int NumChannels;
- unsigned int Flags;
- } CUDA_ARRAY3D_DESCRIPTOR;
- * \endcode
- * where:
- *
- * - \p Width, \p Height, and \p Depth are the width, height, and depth of the
- * CUDA array (in elements); the following types of CUDA arrays can be allocated:
- * - A 1D mipmapped array is allocated if \p Height and \p Depth extents are both zero.
- * - A 2D mipmapped array is allocated if only \p Depth extent is zero.
- * - A 3D mipmapped array is allocated if all three extents are non-zero.
- * - A 1D layered CUDA mipmapped array is allocated if only \p Height is zero and the
- * ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number
- * of layers is determined by the depth extent.
- * - A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and
- * the ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number
- * of layers is determined by the depth extent.
- * - A cubemap CUDA mipmapped array is allocated if all three extents are non-zero and the
- * ::CUDA_ARRAY3D_CUBEMAP flag is set. \p Width must be equal to \p Height, and
- * \p Depth must be six. A cubemap is a special type of 2D layered CUDA array,
- * where the six layers represent the six faces of a cube. The order of the six
- * layers in memory is the same as that listed in ::CUarray_cubemap_face.
- * - A cubemap layered CUDA mipmapped array is allocated if all three extents are non-zero,
- * and both, ::CUDA_ARRAY3D_CUBEMAP and ::CUDA_ARRAY3D_LAYERED flags are set.
- * \p Width must be equal to \p Height, and \p Depth must be a multiple of six.
- * A cubemap layered CUDA array is a special type of 2D layered CUDA array that
- * consists of a collection of cubemaps. The first six layers represent the first
- * cubemap, the next six layers form the second cubemap, and so on.
- *
- * - ::Format specifies the format of the elements; ::CUarray_format is
- * defined as:
- * \code
- typedef enum CUarray_format_enum {
- CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
- CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
- CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
- CU_AD_FORMAT_SIGNED_INT8 = 0x08,
- CU_AD_FORMAT_SIGNED_INT16 = 0x09,
- CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
- CU_AD_FORMAT_HALF = 0x10,
- CU_AD_FORMAT_FLOAT = 0x20,
- CU_AD_FORMAT_NV12 = 0xb0,
- CU_AD_FORMAT_UNORM_INT8X1 = 0xc0,
- CU_AD_FORMAT_UNORM_INT8X2 = 0xc1,
- CU_AD_FORMAT_UNORM_INT8X4 = 0xc2,
- CU_AD_FORMAT_UNORM_INT16X1 = 0xc3,
- CU_AD_FORMAT_UNORM_INT16X2 = 0xc4,
- CU_AD_FORMAT_UNORM_INT16X4 = 0xc5,
- CU_AD_FORMAT_SNORM_INT8X1 = 0xc6,
- CU_AD_FORMAT_SNORM_INT8X2 = 0xc7,
- CU_AD_FORMAT_SNORM_INT8X4 = 0xc8,
- CU_AD_FORMAT_SNORM_INT16X1 = 0xc9,
- CU_AD_FORMAT_SNORM_INT16X2 = 0xca,
- CU_AD_FORMAT_SNORM_INT16X4 = 0xcb,
- CU_AD_FORMAT_BC1_UNORM = 0x91,
- CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92,
- CU_AD_FORMAT_BC2_UNORM = 0x93,
- CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94,
- CU_AD_FORMAT_BC3_UNORM = 0x95,
- CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96,
- CU_AD_FORMAT_BC4_UNORM = 0x97,
- CU_AD_FORMAT_BC4_SNORM = 0x98,
- CU_AD_FORMAT_BC5_UNORM = 0x99,
- CU_AD_FORMAT_BC5_SNORM = 0x9a,
- CU_AD_FORMAT_BC6H_UF16 = 0x9b,
- CU_AD_FORMAT_BC6H_SF16 = 0x9c,
- CU_AD_FORMAT_BC7_UNORM = 0x9d,
- CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e,
- CU_AD_FORMAT_P010 = 0x9f,
- CU_AD_FORMAT_P016 = 0xa1,
- CU_AD_FORMAT_NV16 = 0xa2,
- CU_AD_FORMAT_P210 = 0xa3,
- CU_AD_FORMAT_P216 = 0xa4,
- CU_AD_FORMAT_YUY2 = 0xa5,
- CU_AD_FORMAT_Y210 = 0xa6,
- CU_AD_FORMAT_Y216 = 0xa7,
- CU_AD_FORMAT_AYUV = 0xa8,
- CU_AD_FORMAT_Y410 = 0xa9,
- CU_AD_FORMAT_Y416 = 0xb1,
- CU_AD_FORMAT_Y444_PLANAR8 = 0xb2,
- CU_AD_FORMAT_Y444_PLANAR10 = 0xb3,
- CU_AD_FORMAT_YUV444_8bit_SemiPlanar = 0xb4,
- CU_AD_FORMAT_YUV444_16bit_SemiPlanar = 0xb5,
- CU_AD_FORMAT_UNORM_INT_101010_2 = 0x50,
- } CUarray_format;
- * \endcode
- *
- * - \p NumChannels specifies the number of packed components per CUDA array
- * element; it may be 1, 2, or 4;
- *
- * - ::Flags may be set to
- * - ::CUDA_ARRAY3D_LAYERED to enable creation of layered CUDA mipmapped arrays. If this flag is set,
- * \p Depth specifies the number of layers, not the depth of a 3D array.
- * - ::CUDA_ARRAY3D_SURFACE_LDST to enable surface references to be bound to individual mipmap levels of
- * the CUDA mipmapped array. If this flag is not set, ::cuSurfRefSetArray will fail when attempting to
- * bind a mipmap level of the CUDA mipmapped array to a surface reference.
- * - ::CUDA_ARRAY3D_CUBEMAP to enable creation of mipmapped cubemaps. If this flag is set, \p Width must be
- * equal to \p Height, and \p Depth must be six. If the ::CUDA_ARRAY3D_LAYERED flag is also set,
- * then \p Depth must be a multiple of six.
- * - ::CUDA_ARRAY3D_TEXTURE_GATHER to indicate that the CUDA mipmapped array will be used for texture gather.
- * Texture gather can only be performed on 2D CUDA mipmapped arrays.
- *
- * \p Width, \p Height and \p Depth must meet certain size requirements as listed in the following table.
- * All values are specified in elements. Note that for brevity's sake, the full name of the device attribute
- * is not specified. For ex., TEXTURE1D_MIPMAPPED_WIDTH refers to the device attribute
- * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH.
- *
- * <table>
- * <tr><td><b>CUDA array type</b></td>
- * <td><b>Valid extents that must always be met<br>{(width range in elements), (height range),
- * (depth range)}</b></td>
- * <td><b>Valid extents with CUDA_ARRAY3D_SURFACE_LDST set<br>
- * {(width range in elements), (height range), (depth range)}</b></td></tr>
- * <tr><td>1D</td>
- * <td><small>{ (1,TEXTURE1D_MIPMAPPED_WIDTH), 0, 0 }</small></td>
- * <td><small>{ (1,SURFACE1D_WIDTH), 0, 0 }</small></td></tr>
- * <tr><td>2D</td>
- * <td><small>{ (1,TEXTURE2D_MIPMAPPED_WIDTH), (1,TEXTURE2D_MIPMAPPED_HEIGHT), 0 }</small></td>
- * <td><small>{ (1,SURFACE2D_WIDTH), (1,SURFACE2D_HEIGHT), 0 }</small></td></tr>
- * <tr><td>3D</td>
- * <td><small>{ (1,TEXTURE3D_WIDTH), (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) }
- * <br>OR<br>{ (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE),
- * (1,TEXTURE3D_DEPTH_ALTERNATE) }</small></td>
- * <td><small>{ (1,SURFACE3D_WIDTH), (1,SURFACE3D_HEIGHT),
- * (1,SURFACE3D_DEPTH) }</small></td></tr>
- * <tr><td>1D Layered</td>
- * <td><small>{ (1,TEXTURE1D_LAYERED_WIDTH), 0,
- * (1,TEXTURE1D_LAYERED_LAYERS) }</small></td>
- * <td><small>{ (1,SURFACE1D_LAYERED_WIDTH), 0,
- * (1,SURFACE1D_LAYERED_LAYERS) }</small></td></tr>
- * <tr><td>2D Layered</td>
- * <td><small>{ (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT),
- * (1,TEXTURE2D_LAYERED_LAYERS) }</small></td>
- * <td><small>{ (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT),
- * (1,SURFACE2D_LAYERED_LAYERS) }</small></td></tr>
- * <tr><td>Cubemap</td>
- * <td><small>{ (1,TEXTURECUBEMAP_WIDTH), (1,TEXTURECUBEMAP_WIDTH), 6 }</small></td>
- * <td><small>{ (1,SURFACECUBEMAP_WIDTH),
- * (1,SURFACECUBEMAP_WIDTH), 6 }</small></td></tr>
- * <tr><td>Cubemap Layered</td>
- * <td><small>{ (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH),
- * (1,TEXTURECUBEMAP_LAYERED_LAYERS) }</small></td>
- * <td><small>{ (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH),
- * (1,SURFACECUBEMAP_LAYERED_LAYERS) }</small></td></tr>
- * </table>
- *
- *
- * \param pHandle - Returned mipmapped array
- * \param pMipmappedArrayDesc - mipmapped array descriptor
- * \param numMipmapLevels - Number of mipmap levels
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cuMipmappedArrayDestroy,
- * ::cuMipmappedArrayGetLevel,
- * ::cuArrayCreate,
- * ::cudaMallocMipmappedArray
- */
- CUresult CUDAAPI cuMipmappedArrayCreate(CUmipmappedArray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, unsigned int numMipmapLevels);
- /**
- * \brief Gets a mipmap level of a CUDA mipmapped array
- *
- * Returns in \p *pLevelArray a CUDA array that represents a single mipmap level
- * of the CUDA mipmapped array \p hMipmappedArray.
- *
- * If \p level is greater than the maximum number of levels in this mipmapped array,
- * ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * \param pLevelArray - Returned mipmap level CUDA array
- * \param hMipmappedArray - CUDA mipmapped array
- * \param level - Mipmap level
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa
- * ::cuMipmappedArrayCreate,
- * ::cuMipmappedArrayDestroy,
- * ::cuArrayCreate,
- * ::cudaGetMipmappedArrayLevel
- */
- CUresult CUDAAPI cuMipmappedArrayGetLevel(CUarray *pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level);
- /**
- * \brief Destroys a CUDA mipmapped array
- *
- * Destroys the CUDA mipmapped array \p hMipmappedArray.
- *
- * \param hMipmappedArray - Mipmapped array to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_ARRAY_IS_MAPPED,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED
- * \notefnerr
- *
- * \sa
- * ::cuMipmappedArrayCreate,
- * ::cuMipmappedArrayGetLevel,
- * ::cuArrayCreate,
- * ::cudaFreeMipmappedArray
- */
- CUresult CUDAAPI cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
- /**
- * \brief Retrieve handle for an address range
- *
- * Get a handle of the specified type to an address range. The address range
- * must have been obtained by a prior call to either ::cuMemAlloc or ::cuMemAddressReserve.
- * If the address range was obtained via ::cuMemAddressReserve, it must also be fully mapped via ::cuMemMap.
- * The address range must have been obtained by a prior call to either ::cuMemAllocHost or
- * ::cuMemHostAlloc on Tegra.
- *
- * Users must ensure the \p dptr and \p size are aligned to the host page size.
- *
- * When requesting CUmemRangeHandleType::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD,
- * users are expected to query for dma_buf support for the platform
- * by using ::CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED device attribute before calling
- * this API. The \p handle will be interpreted as a pointer to an integer to store the dma_buf file descriptor.
- * Users must ensure the entire address range is backed and mapped when
- * the address range is allocated by ::cuMemAddressReserve. All the physical
- * allocations backing the address range must be resident on the same device and
- * have identical allocation properties. Users are also expected to retrieve a
- * new handle every time the underlying physical allocation(s) corresponding
- * to a previously queried VA range are changed.
- *
- * For CUmemRangeHandleType::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, users may set
- * flags to ::CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE. Which when set on a
- * supported platform, will give a DMA_BUF handle mapped via PCIE BAR1 or will
- * return an error otherwise.
- *
- * \param[out] handle - Pointer to the location where the returned handle will be stored.
- * \param[in] dptr - Pointer to a valid CUDA device allocation. Must be aligned to host page size.
- * \param[in] size - Length of the address range. Must be aligned to host page size.
- * \param[in] handleType - Type of handle requested (defines type and size of the \p handle output parameter)
- * \param[in] flags - When requesting CUmemRangeHandleType::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD the value could be
- * ::CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE, otherwise 0.
- *
- * \return
- * CUDA_SUCCESS
- * CUDA_ERROR_INVALID_VALUE
- * CUDA_ERROR_NOT_SUPPORTED
- */
- CUresult CUDAAPI cuMemGetHandleForAddressRange(void *handle, CUdeviceptr dptr, size_t size, CUmemRangeHandleType handleType, unsigned long long flags);
- /**
- * \brief Bitmasks for CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_ALGORITHM_MASK.
- */
- typedef enum CUmemDecompressAlgorithm_enum {
- CU_MEM_DECOMPRESS_UNSUPPORTED = 0, /**< Decompression is unsupported. */
- CU_MEM_DECOMPRESS_ALGORITHM_DEFLATE = 1<<0, /**< Deflate is supported. */
- CU_MEM_DECOMPRESS_ALGORITHM_SNAPPY = 1<<1 /**< Snappy is supported. */
- } CUmemDecompressAlgorithm;
- /**
- * \brief Structure describing the parameters that compose a single
- * decompression operation.
- */
- typedef struct CUmemDecompressParams_st {
- /** The number of bytes to be read and decompressed from
- * ::CUmemDecompressParams_st.src. */
- size_t srcNumBytes;
- /** The number of bytes that the decompression operation will be expected to
- * write to ::CUmemDecompressParams_st.dst. This value is optional; if
- * present, it may be used by the CUDA driver as a heuristic for scheduling
- * the individual decompression operations. */
- size_t dstNumBytes;
- /** After the decompression operation has completed, the actual number of
- * bytes written to ::CUmemDecompressParams.dst will be recorded as a 32-bit
- * unsigned integer in the memory at this address. */
- cuuint32_t *dstActBytes;
- /** Pointer to a buffer of at least ::CUmemDecompressParams_st.srcNumBytes
- * compressed bytes. */
- const void *src;
- /** Pointer to a buffer where the decompressed data will be written. The
- * number of bytes written to this location will be recorded in the memory
- * pointed to by ::CUmemDecompressParams_st.dstActBytes */
- void *dst;
- /** The decompression algorithm to use. */
- CUmemDecompressAlgorithm algo;
- /* These bytes are unused and must be zeroed. This ensures compatibility if
- * additional fields are added in the future. */
- unsigned char padding[20];
- } CUmemDecompressParams;
- /**
- * \brief Submit a batch of \p count independent decompression operations.
- *
- * \details Each of the \p count decompression operations is described by a
- * single entry in the \p paramsArray array. Once the batch has been
- * submitted, the function will return, and decompression will happen
- * asynchronously w.r.t. the CPU. To the work completion tracking
- * mechanisms in the CUDA driver, the batch will be considered a single
- * unit of work and processed according to stream semantics, i.e., it
- * is not possible to query the completion of individual decompression
- * operations within a batch.
- *
- * The memory pointed to by each of ::CUmemDecompressParams.src,
- * ::CUmemDecompressParams.dst, and ::CUmemDecompressParams.dstActBytes,
- * must be capable of usage with the hardware decompress feature. That
- * is, for each of said pointers, the pointer attribute
- * ::CU_POINTER_ATTRIBUTE_IS_MEM_DECOMPRESS_CAPABLE should give a
- * non-zero value. To ensure this, the memory backing the pointers
- * should have been allocated using one of the following CUDA memory
- * allocators:
- * * ::cuMemAlloc()
- * * ::cuMemCreate() with the usage flag ::CU_MEM_CREATE_USAGE_HW_DECOMPRESS
- * * ::cuMemAllocFromPoolAsync() from a pool that was created with
- * the usage flag ::CU_MEM_POOL_CREATE_USAGE_HW_DECOMPRESS
- * Additionally, ::CUmemDecompressParams.src, ::CUmemDecompressParams.dst,
- * and ::CUmemDecompressParams.dstActBytes, must all be accessible from
- * the device associated with the context where \p stream was created.
- * For information on how to ensure this, see the documentation for the
- * allocator of interest.
- *
- * \param[in] paramsArray The array of structures describing the independent
- * decompression operations.
- * \param[in] count The number of entries in \p paramsArray array.
- * \param[in] flags Must be 0.
- * \param[out] errorIndex The index into \p paramsArray of the decompression
- * operation for which the error returned by this
- * function pertains to. If \p index is SIZE_MAX and
- * the value returned is not ::CUDA_SUCCESS, then the
- * error returned by this function should be considered
- * a general error that does not pertain to a
- * particular decompression operation. May be \p NULL,
- * in which case, no index will be recorded in the
- * event of error.
- * \param[in] stream The stream where the work will be enqueued.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuMemAlloc, ::cuMemPoolCreate, ::cuMemAllocFromPoolAsync
- */
- CUresult CUDAAPI cuMemBatchDecompressAsync(
- CUmemDecompressParams *paramsArray,
- size_t count,
- unsigned int flags,
- size_t *errorIndex,
- CUstream stream
- );
- /** @} */ /* END CUDA_MEM */
- /**
- * \defgroup CUDA_VA Virtual Memory Management
- *
- * ___MANBRIEF___ virtual memory management functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the virtual memory management functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Allocate an address range reservation.
- *
- * Reserves a virtual address range based on the given parameters, giving
- * the starting address of the range in \p ptr. This API requires a system that
- * supports UVA. The size and address parameters must be a multiple of the
- * host page size and the alignment must be a power of two or zero for default
- * alignment.
- *
- * \param[out] ptr - Resulting pointer to start of virtual address range allocated
- * \param[in] size - Size of the reserved virtual address range requested
- * \param[in] alignment - Alignment of the reserved virtual address range requested
- * \param[in] addr - Fixed starting address range requested
- * \param[in] flags - Currently unused, must be zero
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMemAddressFree
- */
- CUresult CUDAAPI cuMemAddressReserve(CUdeviceptr *ptr, size_t size, size_t alignment, CUdeviceptr addr, unsigned long long flags);
- /**
- * \brief Free an address range reservation.
- *
- * Frees a virtual address range reserved by cuMemAddressReserve. The size
- * must match what was given to memAddressReserve and the ptr given must
- * match what was returned from memAddressReserve.
- *
- * \param[in] ptr - Starting address of the virtual address range to free
- * \param[in] size - Size of the virtual address region to free
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMemAddressReserve
- */
- CUresult CUDAAPI cuMemAddressFree(CUdeviceptr ptr, size_t size);
- /**
- * \brief Create a CUDA memory handle representing a memory allocation of a given size described by the given properties
- *
- * This creates a memory allocation on the target device specified through the
- * \p prop structure. The created allocation will not have any device or host
- * mappings. The generic memory \p handle for the allocation can be
- * mapped to the address space of calling process via ::cuMemMap. This handle
- * cannot be transmitted directly to other processes (see
- * ::cuMemExportToShareableHandle). On Windows, the caller must also pass
- * an LPSECURITYATTRIBUTE in \p prop to be associated with this handle which
- * limits or allows access to this handle for a recipient process (see
- * ::CUmemAllocationProp::win32HandleMetaData for more). The \p size of this
- * allocation must be a multiple of the the value given via
- * ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM
- * flag.
- * To create a CPU allocation targeting a specific host NUMA node, applications must
- * set ::CUmemAllocationProp::CUmemLocation::type to ::CU_MEM_LOCATION_TYPE_HOST_NUMA and
- * ::CUmemAllocationProp::CUmemLocation::id must specify the NUMA ID of the CPU.
- * On systems where NUMA is not available ::CUmemAllocationProp::CUmemLocation::id must be set to 0.
- * Specifying ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT or ::CU_MEM_LOCATION_TYPE_HOST as the
- * ::CUmemLocation::type will result in ::CUDA_ERROR_INVALID_VALUE.
- *
- * Applications that intend to use ::CU_MEM_HANDLE_TYPE_FABRIC based memory sharing must ensure:
- * (1) `nvidia-caps-imex-channels` character device is created by the driver and is listed under /proc/devices
- * (2) have at least one IMEX channel file accessible by the user launching the application.
- *
- * When exporter and importer CUDA processes have been granted access to the same IMEX channel, they can securely
- * share memory.
- *
- * The IMEX channel security model works on a per user basis. Which means all processes under a user can share
- * memory if the user has access to a valid IMEX channel. When multi-user isolation is desired, a separate IMEX
- * channel is required for each user.
- *
- * These channel files exist in /dev/nvidia-caps-imex-channels/channel* and can be created using standard OS
- * native calls like mknod on Linux. For example: To create channel0 with the major number from /proc/devices
- * users can execute the following command: `mknod /dev/nvidia-caps-imex-channels/channel0 c <major number> 0`
- *
- * If ::CUmemAllocationProp::allocFlags::usage contains ::CU_MEM_CREATE_USAGE_TILE_POOL flag then
- * the memory allocation is intended only to be used as backing tile pool for sparse CUDA arrays
- * and sparse CUDA mipmapped arrays.
- * (see ::cuMemMapArrayAsync).
- *
- * \param[out] handle - Value of handle returned. All operations on this allocation are to be performed using this handle.
- * \param[in] size - Size of the allocation requested
- * \param[in] prop - Properties of the allocation to create.
- * \param[in] flags - flags for future use, must be zero now.
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuMemRelease, ::cuMemExportToShareableHandle, ::cuMemImportFromShareableHandle
- */
- CUresult CUDAAPI cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, const CUmemAllocationProp *prop, unsigned long long flags);
- /**
- * \brief Release a memory handle representing a memory allocation which was previously allocated through cuMemCreate.
- *
- * Frees the memory that was allocated on a device through cuMemCreate.
- *
- * The memory allocation will be freed when all outstanding mappings to the memory
- * are unmapped and when all outstanding references to the handle (including it's
- * shareable counterparts) are also released. The generic memory handle can be
- * freed when there are still outstanding mappings made with this handle. Each
- * time a recipient process imports a shareable handle, it needs to pair it with
- * ::cuMemRelease for the handle to be freed. If \p handle is not a valid handle
- * the behavior is undefined.
- *
- * \param[in] handle Value of handle which was returned previously by cuMemCreate.
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuMemCreate
- */
- CUresult CUDAAPI cuMemRelease(CUmemGenericAllocationHandle handle);
- /**
- * \brief Maps an allocation handle to a reserved virtual address range.
- *
- * Maps bytes of memory represented by \p handle starting from byte \p offset to
- * \p size to address range [\p addr, \p addr + \p size]. This range must be an
- * address reservation previously reserved with ::cuMemAddressReserve, and
- * \p offset + \p size must be less than the size of the memory allocation.
- * Both \p ptr, \p size, and \p offset must be a multiple of the value given via
- * ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM flag.
- * If \p handle represents a multicast object, \p ptr, \p size and \p offset must
- * be aligned to the value returned by ::cuMulticastGetGranularity with the flag
- * ::CU_MULTICAST_MINIMUM_GRANULARITY. For best performance however, it is
- * recommended that \p ptr, \p size and \p offset be aligned to the value
- * returned by ::cuMulticastGetGranularity with the flag
- * ::CU_MULTICAST_RECOMMENDED_GRANULARITY.
- *
- * Please note calling ::cuMemMap does not make the address accessible,
- * the caller needs to update accessibility of a contiguous mapped VA
- * range by calling ::cuMemSetAccess.
- *
- * Once a recipient process obtains a shareable memory handle
- * from ::cuMemImportFromShareableHandle, the process must
- * use ::cuMemMap to map the memory into its address ranges before
- * setting accessibility with ::cuMemSetAccess.
- *
- * ::cuMemMap can only create mappings on VA range reservations
- * that are not currently mapped.
- *
- * \param[in] ptr - Address where memory will be mapped.
- * \param[in] size - Size of the memory mapping.
- * \param[in] offset - Offset into the memory represented by
- * - \p handle from which to start mapping
- * - Note: currently must be zero.
- * \param[in] handle - Handle to a shareable memory
- * \param[in] flags - flags for future use, must be zero now.
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuMemUnmap, ::cuMemSetAccess, ::cuMemCreate, ::cuMemAddressReserve, ::cuMemImportFromShareableHandle
- */
- CUresult CUDAAPI cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags);
- /**
- * \brief Maps or unmaps subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays
- *
- * Performs map or unmap operations on subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays.
- * Each operation is specified by a ::CUarrayMapInfo entry in the \p mapInfoList array of size \p count.
- * The structure ::CUarrayMapInfo is defined as follow:
- \code
- typedef struct CUarrayMapInfo_st {
- CUresourcetype resourceType;
- union {
- CUmipmappedArray mipmap;
- CUarray array;
- } resource;
- CUarraySparseSubresourceType subresourceType;
- union {
- struct {
- unsigned int level;
- unsigned int layer;
- unsigned int offsetX;
- unsigned int offsetY;
- unsigned int offsetZ;
- unsigned int extentWidth;
- unsigned int extentHeight;
- unsigned int extentDepth;
- } sparseLevel;
- struct {
- unsigned int layer;
- unsigned long long offset;
- unsigned long long size;
- } miptail;
- } subresource;
- CUmemOperationType memOperationType;
-
- CUmemHandleType memHandleType;
- union {
- CUmemGenericAllocationHandle memHandle;
- } memHandle;
- unsigned long long offset;
- unsigned int deviceBitMask;
- unsigned int flags;
- unsigned int reserved[2];
- } CUarrayMapInfo;
- \endcode
- *
- * where ::CUarrayMapInfo::resourceType specifies the type of resource to be operated on.
- * If ::CUarrayMapInfo::resourceType is set to ::CUresourcetype::CU_RESOURCE_TYPE_ARRAY then
- * ::CUarrayMapInfo::resource::array must be set to a valid sparse CUDA array handle.
- * The CUDA array must be either a 2D, 2D layered or 3D CUDA array and must have been allocated using
- * ::cuArrayCreate or ::cuArray3DCreate with the flag ::CUDA_ARRAY3D_SPARSE
- * or ::CUDA_ARRAY3D_DEFERRED_MAPPING.
- * For CUDA arrays obtained using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned.
- * If ::CUarrayMapInfo::resourceType is set to ::CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
- * then ::CUarrayMapInfo::resource::mipmap must be set to a valid sparse CUDA mipmapped array handle.
- * The CUDA mipmapped array must be either a 2D, 2D layered or 3D CUDA mipmapped array and must have been
- * allocated using ::cuMipmappedArrayCreate with the flag ::CUDA_ARRAY3D_SPARSE
- * or ::CUDA_ARRAY3D_DEFERRED_MAPPING.
- *
- * ::CUarrayMapInfo::subresourceType specifies the type of subresource within the resource.
- * ::CUarraySparseSubresourceType_enum is defined as:
- \code
- typedef enum CUarraySparseSubresourceType_enum {
- CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0,
- CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1
- } CUarraySparseSubresourceType;
- \endcode
- *
- * where ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL indicates a
- * sparse-miplevel which spans at least one tile in every dimension. The remaining miplevels which
- * are too small to span at least one tile in any dimension constitute the mip tail region as indicated by
- * ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL subresource type.
- *
- * If ::CUarrayMapInfo::subresourceType is set to ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL
- * then ::CUarrayMapInfo::subresource::sparseLevel struct must contain valid array subregion offsets and extents.
- * The ::CUarrayMapInfo::subresource::sparseLevel::offsetX, ::CUarrayMapInfo::subresource::sparseLevel::offsetY
- * and ::CUarrayMapInfo::subresource::sparseLevel::offsetZ must specify valid X, Y and Z offsets respectively.
- * The ::CUarrayMapInfo::subresource::sparseLevel::extentWidth, ::CUarrayMapInfo::subresource::sparseLevel::extentHeight
- * and ::CUarrayMapInfo::subresource::sparseLevel::extentDepth must specify valid width, height and depth extents respectively.
- * These offsets and extents must be aligned to the corresponding tile dimension.
- * For CUDA mipmapped arrays ::CUarrayMapInfo::subresource::sparseLevel::level must specify a valid mip level index. Otherwise,
- * must be zero.
- * For layered CUDA arrays and layered CUDA mipmapped arrays ::CUarrayMapInfo::subresource::sparseLevel::layer must specify a valid layer index. Otherwise,
- * must be zero.
- * ::CUarrayMapInfo::subresource::sparseLevel::offsetZ must be zero and ::CUarrayMapInfo::subresource::sparseLevel::extentDepth
- * must be set to 1 for 2D and 2D layered CUDA arrays and CUDA mipmapped arrays.
- * Tile extents can be obtained by calling ::cuArrayGetSparseProperties and ::cuMipmappedArrayGetSparseProperties
- *
- * If ::CUarrayMapInfo::subresourceType is set to ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL
- * then ::CUarrayMapInfo::subresource::miptail struct must contain valid mip tail offset in
- * ::CUarrayMapInfo::subresource::miptail::offset and size in ::CUarrayMapInfo::subresource::miptail::size.
- * Both, mip tail offset and mip tail size must be aligned to the tile size.
- * For layered CUDA mipmapped arrays which don't have the flag ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL set in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags
- * as returned by ::cuMipmappedArrayGetSparseProperties, ::CUarrayMapInfo::subresource::miptail::layer must specify a valid layer index.
- * Otherwise, must be zero.
- *
- * If ::CUarrayMapInfo::resource::array or ::CUarrayMapInfo::resource::mipmap was created with ::CUDA_ARRAY3D_DEFERRED_MAPPING
- * flag set the ::CUarrayMapInfo::subresourceType and the contents of ::CUarrayMapInfo::subresource will be ignored.
- *
- * ::CUarrayMapInfo::memOperationType specifies the type of operation. ::CUmemOperationType is defined as:
- \code
- typedef enum CUmemOperationType_enum {
- CU_MEM_OPERATION_TYPE_MAP = 1,
- CU_MEM_OPERATION_TYPE_UNMAP = 2
- } CUmemOperationType;
- \endcode
- * If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_MAP then the subresource
- * will be mapped onto the tile pool memory specified by ::CUarrayMapInfo::memHandle at offset ::CUarrayMapInfo::offset.
- * The tile pool allocation has to be created by specifying the ::CU_MEM_CREATE_USAGE_TILE_POOL flag when calling ::cuMemCreate. Also,
- * ::CUarrayMapInfo::memHandleType must be set to ::CUmemHandleType::CU_MEM_HANDLE_TYPE_GENERIC.
- *
- * If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_UNMAP then an unmapping operation
- * is performed. ::CUarrayMapInfo::memHandle must be NULL.
- *
- * ::CUarrayMapInfo::deviceBitMask specifies the list of devices that must map or unmap physical memory.
- * Currently, this mask must have exactly one bit set, and the corresponding device must match the device associated with the stream.
- * If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_MAP, the device must also match
- * the device associated with the tile pool memory allocation as specified by ::CUarrayMapInfo::memHandle.
- *
- * ::CUarrayMapInfo::flags and ::CUarrayMapInfo::reserved[] are unused and must be set to zero.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- *
- * \param[in] mapInfoList - List of ::CUarrayMapInfo
- * \param[in] count - Count of ::CUarrayMapInfo in \p mapInfoList
- * \param[in] hStream - Stream identifier for the stream to use for map or unmap operations
- *
- * \sa ::cuMipmappedArrayCreate, ::cuArrayCreate, ::cuArray3DCreate, ::cuMemCreate, ::cuArrayGetSparseProperties, ::cuMipmappedArrayGetSparseProperties
- */
- CUresult CUDAAPI cuMemMapArrayAsync(CUarrayMapInfo *mapInfoList, unsigned int count, CUstream hStream);
- /**
- * \brief Unmap the backing memory of a given address range.
- *
- * The range must be the entire contiguous address range that was mapped to. In
- * other words, ::cuMemUnmap cannot unmap a sub-range of an address range mapped
- * by ::cuMemCreate / ::cuMemMap. Any backing memory allocations will be freed
- * if there are no existing mappings and there are no unreleased memory handles.
- *
- * When ::cuMemUnmap returns successfully the address range is converted to an
- * address reservation and can be used for a future calls to ::cuMemMap. Any new
- * mapping to this virtual address will need to have access granted through
- * ::cuMemSetAccess, as all mappings start with no accessibility setup.
- *
- * \param[in] ptr - Starting address for the virtual address range to unmap
- * \param[in] size - Size of the virtual address range to unmap
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuMemCreate, ::cuMemAddressReserve
- */
- CUresult CUDAAPI cuMemUnmap(CUdeviceptr ptr, size_t size);
- /**
- * \brief Set the access flags for each location specified in \p desc for the given virtual address range
- *
- * Given the virtual address range via \p ptr and \p size, and the locations
- * in the array given by \p desc and \p count, set the access flags for the
- * target locations. The range must be a fully mapped address range
- * containing all allocations created by ::cuMemMap / ::cuMemCreate.
- * Users cannot specify ::CU_MEM_LOCATION_TYPE_HOST_NUMA accessibility for allocations created on with other location types.
- * Note: When ::CUmemAccessDesc::CUmemLocation::type is ::CU_MEM_LOCATION_TYPE_HOST_NUMA, ::CUmemAccessDesc::CUmemLocation::id
- * is ignored.
- * When setting the access flags for a virtual address range mapping a multicast
- * object, \p ptr and \p size must be aligned to the value returned by
- * ::cuMulticastGetGranularity with the flag ::CU_MULTICAST_MINIMUM_GRANULARITY.
- * For best performance however, it is recommended that \p ptr and \p size be
- * aligned to the value returned by ::cuMulticastGetGranularity with the flag
- * ::CU_MULTICAST_RECOMMENDED_GRANULARITY.
- *
- * \param[in] ptr - Starting address for the virtual address range
- * \param[in] size - Length of the virtual address range
- * \param[in] desc - Array of ::CUmemAccessDesc that describe how to change the
- * - mapping for each location specified
- * \param[in] count - Number of ::CUmemAccessDesc in \p desc
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- * \note_sync
- *
- * \sa ::cuMemSetAccess, ::cuMemCreate, :cuMemMap
- */
- CUresult CUDAAPI cuMemSetAccess(CUdeviceptr ptr, size_t size, const CUmemAccessDesc *desc, size_t count);
- /**
- * \brief Get the access \p flags set for the given \p location and \p ptr
- *
- * \param[out] flags - Flags set for this location
- * \param[in] location - Location in which to check the flags for
- * \param[in] ptr - Address in which to check the access flags for
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMemSetAccess
- */
- CUresult CUDAAPI cuMemGetAccess(unsigned long long *flags, const CUmemLocation *location, CUdeviceptr ptr);
- /**
- * \brief Exports an allocation to a requested shareable handle type
- *
- * Given a CUDA memory handle, create a shareable memory
- * allocation handle that can be used to share the memory with other
- * processes. The recipient process can convert the shareable handle back into a
- * CUDA memory handle using ::cuMemImportFromShareableHandle and map
- * it with ::cuMemMap. The implementation of what this handle is and how it
- * can be transferred is defined by the requested handle type in \p handleType
- *
- * Once all shareable handles are closed and the allocation is released, the allocated
- * memory referenced will be released back to the OS and uses of the CUDA handle afterward
- * will lead to undefined behavior.
- *
- * This API can also be used in conjunction with other APIs (e.g. Vulkan, OpenGL)
- * that support importing memory from the shareable type
- *
- * \param[out] shareableHandle - Pointer to the location in which to store the requested handle type
- * \param[in] handle - CUDA handle for the memory allocation
- * \param[in] handleType - Type of shareable handle requested (defines type and size of the \p shareableHandle output parameter)
- * \param[in] flags - Reserved, must be zero
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMemImportFromShareableHandle
- */
- CUresult CUDAAPI cuMemExportToShareableHandle(void *shareableHandle, CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType, unsigned long long flags);
- /**
- * \brief Imports an allocation from a requested shareable handle type.
- *
- * If the current process cannot support the memory described by this shareable
- * handle, this API will error as ::CUDA_ERROR_NOT_SUPPORTED.
- *
- * If \p shHandleType is ::CU_MEM_HANDLE_TYPE_FABRIC and the importer process has not been
- * granted access to the same IMEX channel as the exporter process, this API will error
- * as ::CUDA_ERROR_NOT_PERMITTED.
- *
- * \note Importing shareable handles exported from some graphics APIs(VUlkan, OpenGL, etc)
- * created on devices under an SLI group may not be supported, and thus this API will
- * return CUDA_ERROR_NOT_SUPPORTED.
- * There is no guarantee that the contents of \p handle will be the same CUDA memory handle
- * for the same given OS shareable handle, or the same underlying allocation.
- *
- * \param[out] handle - CUDA Memory handle for the memory allocation.
- * \param[in] osHandle - Shareable Handle representing the memory allocation that is to be imported.
- * \param[in] shHandleType - handle type of the exported handle ::CUmemAllocationHandleType.
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMemExportToShareableHandle, ::cuMemMap, ::cuMemRelease
- */
- CUresult CUDAAPI cuMemImportFromShareableHandle(CUmemGenericAllocationHandle *handle, void *osHandle, CUmemAllocationHandleType shHandleType);
- /**
- * \brief Calculates either the minimal or recommended granularity
- *
- * Calculates either the minimal or recommended granularity
- * for a given allocation specification and returns it in granularity. This
- * granularity can be used as a multiple for alignment, size, or address mapping.
- *
- * \param[out] granularity Returned granularity.
- * \param[in] prop Property for which to determine the granularity for
- * \param[in] option Determines which granularity to return
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMemCreate, ::cuMemMap
- */
- CUresult CUDAAPI cuMemGetAllocationGranularity(size_t *granularity, const CUmemAllocationProp *prop, CUmemAllocationGranularity_flags option);
- /**
- * \brief Retrieve the contents of the property structure defining properties for this handle
- *
- * \param[out] prop - Pointer to a properties structure which will hold the information about this handle
- * \param[in] handle - Handle which to perform the query on
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMemCreate, ::cuMemImportFromShareableHandle
- */
- CUresult CUDAAPI cuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp *prop, CUmemGenericAllocationHandle handle);
- /**
- * \brief Given an address \p addr, returns the allocation handle of the backing memory allocation.
- *
- * The handle is guaranteed to be the same handle value used to map the memory. If the address
- * requested is not mapped, the function will fail. The returned handle must be released with
- * corresponding number of calls to ::cuMemRelease.
- *
- * \note The address \p addr, can be any address in a range previously mapped
- * by ::cuMemMap, and not necessarily the start address.
- *
- * \param[out] handle CUDA Memory handle for the backing memory allocation.
- * \param[in] addr Memory address to query, that has been mapped previously.
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMemCreate, ::cuMemRelease, ::cuMemMap
- */
- CUresult CUDAAPI cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr);
- /** @} */ /* END CUDA_VA */
- /**
- * \defgroup CUDA_MALLOC_ASYNC Stream Ordered Memory Allocator
- *
- * ___MANBRIEF___ Functions for performing allocation and free operations in stream order.
- * Functions for controlling the behavior of the underlying allocator.
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the stream ordered memory allocator exposed by the
- * low-level CUDA driver application programming interface.
- *
- * @{
- *
- * \section CUDA_MALLOC_ASYNC_overview overview
- *
- * The asynchronous allocator allows the user to allocate and free in stream order.
- * All asynchronous accesses of the allocation must happen between
- * the stream executions of the allocation and the free. If the memory is accessed
- * outside of the promised stream order, a use before allocation / use after free error
- * will cause undefined behavior.
- *
- * The allocator is free to reallocate the memory as long as it can guarantee
- * that compliant memory accesses will not overlap temporally.
- * The allocator may refer to internal stream ordering as well as inter-stream dependencies
- * (such as CUDA events and null stream dependencies) when establishing the temporal guarantee.
- * The allocator may also insert inter-stream dependencies to establish the temporal guarantee.
- *
- * \section CUDA_MALLOC_ASYNC_support Supported Platforms
- *
- * Whether or not a device supports the integrated stream ordered memory allocator
- * may be queried by calling ::cuDeviceGetAttribute() with the device attribute
- * ::CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED
- */
- /**
- * \brief Frees memory with stream ordered semantics
- *
- * Inserts a free operation into \p hStream.
- * The allocation must not be accessed after stream execution reaches the free.
- * After this API returns, accessing the memory from any subsequent work launched on the GPU
- * or querying its pointer attributes results in undefined behavior.
- *
- * \note During stream capture, this function results in the creation of a free node and
- * must therefore be passed the address of a graph allocation.
- *
- * \param dptr - memory to free
- * \param hStream - The stream establishing the stream ordering contract.
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT (default stream specified with no current context),
- * ::CUDA_ERROR_NOT_SUPPORTED
- */
- CUresult CUDAAPI cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream);
- /**
- * \brief Allocates memory with stream ordered semantics
- *
- * Inserts an allocation operation into \p hStream.
- * A pointer to the allocated memory is returned immediately in *dptr.
- * The allocation must not be accessed until the the allocation operation completes.
- * The allocation comes from the memory pool current to the stream's device.
- *
- * \note The default memory pool of a device contains device memory from that device.
- * \note Basic stream ordering allows future work submitted into the same stream to use the allocation.
- * Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation
- * operation completes before work submitted in a separate stream runs.
- * \note During stream capture, this function results in the creation of an allocation node. In this case,
- * the allocation is owned by the graph instead of the memory pool. The memory pool's properties
- * are used to set the node's creation parameters.
- *
- * \param[out] dptr - Returned device pointer
- * \param[in] bytesize - Number of bytes to allocate
- * \param[in] hStream - The stream establishing the stream ordering contract and the memory pool to allocate from
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT (default stream specified with no current context),
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa ::cuMemAllocFromPoolAsync, ::cuMemFreeAsync, ::cuDeviceSetMemPool,
- * ::cuDeviceGetDefaultMemPool, ::cuDeviceGetMemPool, ::cuMemPoolCreate,
- * ::cuMemPoolSetAccess, ::cuMemPoolSetAttribute
- */
- CUresult CUDAAPI cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, CUstream hStream);
- /**
- * \brief Tries to release memory back to the OS
- *
- * Releases memory back to the OS until the pool contains fewer than minBytesToKeep
- * reserved bytes, or there is no more memory that the allocator can safely release.
- * The allocator cannot release OS allocations that back outstanding asynchronous allocations.
- * The OS allocations may happen at different granularity from the user allocations.
- *
- * \note: Allocations that have not been freed count as outstanding.
- * \note: Allocations that have been asynchronously freed but whose completion has
- * not been observed on the host (eg. by a synchronize) can count as outstanding.
- *
- * \param[in] pool - The memory pool to trim
- * \param[in] minBytesToKeep - If the pool has less than minBytesToKeep reserved,
- * the TrimTo operation is a no-op. Otherwise the pool will be guaranteed to have
- * at least minBytesToKeep bytes reserved after the operation.
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool,
- * ::cuDeviceGetMemPool, ::cuMemPoolCreate
- */
- CUresult CUDAAPI cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep);
- /**
- * \brief Sets attributes of a memory pool
- *
- * Supported attributes are:
- * - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t)
- * Amount of reserved memory in bytes to hold onto before trying
- * to release memory back to the OS. When more than the release
- * threshold bytes of memory are held by the memory pool, the
- * allocator will try to release memory back to the OS on the
- * next call to stream, event or context synchronize. (default 0)
- * - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int)
- * Allow ::cuMemAllocAsync to use memory asynchronously freed
- * in another stream as long as a stream ordering dependency
- * of the allocating stream on the free action exists.
- * Cuda events and null stream interactions can create the required
- * stream ordered dependencies. (default enabled)
- * - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int)
- * Allow reuse of already completed frees when there is no dependency
- * between the free and allocation. (default enabled)
- * - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int)
- * Allow ::cuMemAllocAsync to insert new stream dependencies
- * in order to establish the stream ordering required to reuse
- * a piece of memory released by ::cuMemFreeAsync (default enabled).
- * - ::CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH: (value type = cuuint64_t)
- * Reset the high watermark that tracks the amount of backing memory that was
- * allocated for the memory pool. It is illegal to set this attribute to a non-zero value.
- * - ::CU_MEMPOOL_ATTR_USED_MEM_HIGH: (value type = cuuint64_t)
- * Reset the high watermark that tracks the amount of used memory that was
- * allocated for the memory pool.
- *
- * \param[in] pool - The memory pool to modify
- * \param[in] attr - The attribute to modify
- * \param[in] value - Pointer to the value to assign
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool,
- * ::cuDeviceGetMemPool, ::cuMemPoolCreate
- */
- CUresult CUDAAPI cuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void *value);
- /**
- * \brief Gets attributes of a memory pool
- *
- * Supported attributes are:
- * - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t)
- * Amount of reserved memory in bytes to hold onto before trying
- * to release memory back to the OS. When more than the release
- * threshold bytes of memory are held by the memory pool, the
- * allocator will try to release memory back to the OS on the
- * next call to stream, event or context synchronize. (default 0)
- * - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int)
- * Allow ::cuMemAllocAsync to use memory asynchronously freed
- * in another stream as long as a stream ordering dependency
- * of the allocating stream on the free action exists.
- * Cuda events and null stream interactions can create the required
- * stream ordered dependencies. (default enabled)
- * - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int)
- * Allow reuse of already completed frees when there is no dependency
- * between the free and allocation. (default enabled)
- * - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int)
- * Allow ::cuMemAllocAsync to insert new stream dependencies
- * in order to establish the stream ordering required to reuse
- * a piece of memory released by ::cuMemFreeAsync (default enabled).
- * - ::CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT: (value type = cuuint64_t)
- * Amount of backing memory currently allocated for the mempool
- * - ::CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH: (value type = cuuint64_t)
- * High watermark of backing memory allocated for the mempool since the
- * last time it was reset.
- * - ::CU_MEMPOOL_ATTR_USED_MEM_CURRENT: (value type = cuuint64_t)
- * Amount of memory from the pool that is currently in use by the application.
- * - ::CU_MEMPOOL_ATTR_USED_MEM_HIGH: (value type = cuuint64_t)
- * High watermark of the amount of memory from the pool that was in use by the application.
- *
- * \param[in] pool - The memory pool to get attributes of
- * \param[in] attr - The attribute to get
- * \param[out] value - Retrieved value
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool,
- * ::cuDeviceGetMemPool, ::cuMemPoolCreate
- */
- CUresult CUDAAPI cuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void *value);
- /**
- * \brief Controls visibility of pools between devices
- *
- * \param[in] pool - The pool being modified
- * \param[in] map - Array of access descriptors. Each descriptor instructs the access to enable for a single gpu.
- * \param[in] count - Number of descriptors in the map array.
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool,
- * ::cuDeviceGetMemPool, ::cuMemPoolCreate
- */
- CUresult CUDAAPI cuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc *map, size_t count);
- /**
- * \brief Returns the accessibility of a pool from a device
- *
- * Returns the accessibility of the pool's memory from the specified location.
- *
- * \param[out] flags - the accessibility of the pool from the specified location
- * \param[in] memPool - the pool being queried
- * \param[in] location - the location accessing the pool
- *
- * \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool,
- * ::cuDeviceGetMemPool, ::cuMemPoolCreate
- */
- CUresult CUDAAPI cuMemPoolGetAccess(CUmemAccess_flags *flags, CUmemoryPool memPool, CUmemLocation *location);
- /**
- * \brief Creates a memory pool
- *
- * Creates a CUDA memory pool and returns the handle in \p pool. The \p poolProps determines
- * the properties of the pool such as the backing device and IPC capabilities.
- *
- * To create a memory pool targeting a specific host NUMA node, applications must
- * set ::CUmemPoolProps::CUmemLocation::type to ::CU_MEM_LOCATION_TYPE_HOST_NUMA and
- * ::CUmemPoolProps::CUmemLocation::id must specify the NUMA ID of the host memory node.
- * Specifying ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT or ::CU_MEM_LOCATION_TYPE_HOST as the
- * ::CUmemPoolProps::CUmemLocation::type will result in ::CUDA_ERROR_INVALID_VALUE.
- * By default, the pool's memory will be accessible from the device it is allocated on.
- * In the case of pools created with ::CU_MEM_LOCATION_TYPE_HOST_NUMA, their default accessibility
- * will be from the host CPU.
- * Applications can control the maximum size of the pool by specifying a non-zero value for ::CUmemPoolProps::maxSize.
- * If set to 0, the maximum size of the pool will default to a system dependent value.
- *
- * Applications that intend to use ::CU_MEM_HANDLE_TYPE_FABRIC based memory sharing must ensure:
- * (1) `nvidia-caps-imex-channels` character device is created by the driver and is listed under /proc/devices
- * (2) have at least one IMEX channel file accessible by the user launching the application.
- *
- * When exporter and importer CUDA processes have been granted access to the same IMEX channel, they can securely
- * share memory.
- *
- * The IMEX channel security model works on a per user basis. Which means all processes under a user can share
- * memory if the user has access to a valid IMEX channel. When multi-user isolation is desired, a separate IMEX
- * channel is required for each user.
- *
- * These channel files exist in /dev/nvidia-caps-imex-channels/channel* and can be created using standard OS
- * native calls like mknod on Linux. For example: To create channel0 with the major number from /proc/devices
- * users can execute the following command: `mknod /dev/nvidia-caps-imex-channels/channel0 c <major number> 0`
- *
- * \note Specifying CU_MEM_HANDLE_TYPE_NONE creates a memory pool that will not support IPC.
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_NOT_PERMITTED
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuDeviceSetMemPool, ::cuDeviceGetMemPool, ::cuDeviceGetDefaultMemPool,
- * ::cuMemAllocFromPoolAsync, ::cuMemPoolExportToShareableHandle
- */
- CUresult CUDAAPI cuMemPoolCreate(CUmemoryPool *pool, const CUmemPoolProps *poolProps);
- /**
- * \brief Destroys the specified memory pool
- *
- * If any pointers obtained from this pool haven't been freed or
- * the pool has free operations that haven't completed
- * when ::cuMemPoolDestroy is invoked, the function will return immediately and the
- * resources associated with the pool will be released automatically
- * once there are no more outstanding allocations.
- *
- * Destroying the current mempool of a device sets the default mempool of
- * that device as the current mempool for that device.
- *
- * \note A device's default memory pool cannot be destroyed.
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuMemFreeAsync, ::cuDeviceSetMemPool, ::cuDeviceGetMemPool,
- * ::cuDeviceGetDefaultMemPool, ::cuMemPoolCreate
- */
- CUresult CUDAAPI cuMemPoolDestroy(CUmemoryPool pool);
- /**
- * \brief Allocates memory from a specified pool with stream ordered semantics.
- *
- * Inserts an allocation operation into \p hStream.
- * A pointer to the allocated memory is returned immediately in *dptr.
- * The allocation must not be accessed until the the allocation operation completes.
- * The allocation comes from the specified memory pool.
- *
- * \note
- * - The specified memory pool may be from a device different than that of the specified \p hStream.
- *
- * - Basic stream ordering allows future work submitted into the same stream to use the allocation.
- * Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation
- * operation completes before work submitted in a separate stream runs.
- *
- * \note During stream capture, this function results in the creation of an allocation node. In this case,
- * the allocation is owned by the graph instead of the memory pool. The memory pool's properties
- * are used to set the node's creation parameters.
- *
- * \param[out] dptr - Returned device pointer
- * \param[in] bytesize - Number of bytes to allocate
- * \param[in] pool - The pool to allocate from
- * \param[in] hStream - The stream establishing the stream ordering semantic
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT (default stream specified with no current context),
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool,
- * ::cuDeviceGetMemPool, ::cuMemPoolCreate, ::cuMemPoolSetAccess,
- * ::cuMemPoolSetAttribute
- */
- CUresult CUDAAPI cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream);
- /**
- * \brief Exports a memory pool to the requested handle type.
- *
- * Given an IPC capable mempool, create an OS handle to share the pool with another process.
- * A recipient process can convert the shareable handle into a mempool with ::cuMemPoolImportFromShareableHandle.
- * Individual pointers can then be shared with the ::cuMemPoolExportPointer and ::cuMemPoolImportPointer APIs.
- * The implementation of what the shareable handle is and how it can be transferred is defined by the requested
- * handle type.
- *
- * \note: To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than CU_MEM_HANDLE_TYPE_NONE.
- *
- * \param[out] handle_out - Returned OS handle
- * \param[in] pool - pool to export
- * \param[in] handleType - the type of handle to create
- * \param[in] flags - must be 0
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa ::cuMemPoolImportFromShareableHandle, ::cuMemPoolExportPointer,
- * ::cuMemPoolImportPointer, ::cuMemAllocAsync, ::cuMemFreeAsync,
- * ::cuDeviceGetDefaultMemPool, ::cuDeviceGetMemPool, ::cuMemPoolCreate,
- * ::cuMemPoolSetAccess, ::cuMemPoolSetAttribute
- */
- CUresult CUDAAPI cuMemPoolExportToShareableHandle(void *handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags);
- /**
- * \brief imports a memory pool from a shared handle.
- *
- * Specific allocations can be imported from the imported pool with cuMemPoolImportPointer.
- *
- * If \p handleType is ::CU_MEM_HANDLE_TYPE_FABRIC and the importer process has not been
- * granted access to the same IMEX channel as the exporter process, this API will error
- * as ::CUDA_ERROR_NOT_PERMITTED.
- *
- *
- * \note Imported memory pools do not support creating new allocations.
- * As such imported memory pools may not be used in cuDeviceSetMemPool
- * or ::cuMemAllocFromPoolAsync calls.
- *
- * \param[out] pool_out - Returned memory pool
- * \param[in] handle - OS handle of the pool to open
- * \param[in] handleType - The type of handle being imported
- * \param[in] flags - must be 0
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa ::cuMemPoolExportToShareableHandle, ::cuMemPoolExportPointer, ::cuMemPoolImportPointer
- */
- CUresult CUDAAPI cuMemPoolImportFromShareableHandle(
- CUmemoryPool *pool_out,
- void *handle,
- CUmemAllocationHandleType handleType,
- unsigned long long flags);
- /**
- * \brief Export data to share a memory pool allocation between processes.
- *
- * Constructs \p shareData_out for sharing a specific allocation from an already shared memory pool.
- * The recipient process can import the allocation with the ::cuMemPoolImportPointer api.
- * The data is not a handle and may be shared through any IPC mechanism.
- *
- * \param[out] shareData_out - Returned export data
- * \param[in] ptr - pointer to memory being exported
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa ::cuMemPoolExportToShareableHandle, ::cuMemPoolImportFromShareableHandle, ::cuMemPoolImportPointer
- */
- CUresult CUDAAPI cuMemPoolExportPointer(CUmemPoolPtrExportData *shareData_out, CUdeviceptr ptr);
- /**
- * \brief Import a memory pool allocation from another process.
- *
- * Returns in \p ptr_out a pointer to the imported memory.
- * The imported memory must not be accessed before the allocation operation completes
- * in the exporting process. The imported memory must be freed from all importing processes before
- * being freed in the exporting process. The pointer may be freed with cuMemFree
- * or cuMemFreeAsync. If cuMemFreeAsync is used, the free must be completed
- * on the importing process before the free operation on the exporting process.
- *
- * \note The cuMemFreeAsync api may be used in the exporting process before
- * the cuMemFreeAsync operation completes in its stream as long as the
- * cuMemFreeAsync in the exporting process specifies a stream with
- * a stream dependency on the importing process's cuMemFreeAsync.
- *
- * \param[out] ptr_out - pointer to imported memory
- * \param[in] pool - pool from which to import
- * \param[in] shareData - data specifying the memory to import
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa ::cuMemPoolExportToShareableHandle, ::cuMemPoolImportFromShareableHandle, ::cuMemPoolExportPointer
- */
- CUresult CUDAAPI cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData *shareData);
- /** @} */ /* END CUDA_MALLOC_ASYNC */
- /**
- * \defgroup CUDA_MULTICAST Multicast Object Management
- *
- * ___MANBRIEF___ Functions for creating multicast objects, adding devices to them and binding/unbinding memory
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the CUDA multicast object operations exposed by the
- * low-level CUDA driver application programming interface.
- *
- * @{
- *
- * \section CUDA_MULTICAST_overview overview
- *
- * A multicast object created via ::cuMulticastCreate enables certain memory
- * operations to be broadcast to a team of devices. Devices can be added to a
- * multicast object via ::cuMulticastAddDevice. Memory can be bound on each
- * participating device via either ::cuMulticastBindMem or ::cuMulticastBindAddr.
- * Multicast objects can be mapped into a device's virtual address space using
- * the virtual memmory management APIs (see ::cuMemMap and ::cuMemSetAccess).
- *
- * \section CUDA_MULTICAST_support Supported Platforms
- *
- * Support for multicast on a specific device can be queried using the device
- * attribute ::CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED
- */
- /**
- * \brief Create a generic allocation handle representing a multicast object described by the given properties.
- *
- * This creates a multicast object as described by \p prop. The number of
- * participating devices is specified by ::CUmulticastObjectProp::numDevices.
- * Devices can be added to the multicast object via ::cuMulticastAddDevice.
- * All participating devices must be added to the multicast object before memory
- * can be bound to it. Memory is bound to the multicast object via either
- * ::cuMulticastBindMem or ::cuMulticastBindAddr, and can be unbound via
- * ::cuMulticastUnbind. The total amount of memory that can be bound per device
- * is specified by :CUmulticastObjectProp::size. This size must be a multiple of
- * the value returned by ::cuMulticastGetGranularity with the flag
- * ::CU_MULTICAST_GRANULARITY_MINIMUM. For best performance however, the size
- * should be aligned to the value returned by ::cuMulticastGetGranularity with
- * the flag ::CU_MULTICAST_GRANULARITY_RECOMMENDED.
- *
- * After all participating devices have been added, multicast objects can also
- * be mapped to a device's virtual address space using the virtual memory
- * management APIs (see ::cuMemMap and ::cuMemSetAccess). Multicast objects can
- * also be shared with other processes by requesting a shareable handle via
- * ::cuMemExportToShareableHandle. Note that the desired types of shareable
- * handles must be specified in the bitmask ::CUmulticastObjectProp::handleTypes.
- * Multicast objects can be released using the virtual memory management API
- * ::cuMemRelease.
- *
- * \param[out] mcHandle Value of handle returned.
- * \param[in] prop Properties of the multicast object to create.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMulticastAddDevice, ::cuMulticastBindMem, ::cuMulticastBindAddr, ::cuMulticastUnbind
- * \sa ::cuMemCreate, ::cuMemRelease, ::cuMemExportToShareableHandle, ::cuMemImportFromShareableHandle
- */
- CUresult CUDAAPI cuMulticastCreate(CUmemGenericAllocationHandle *mcHandle, const CUmulticastObjectProp *prop);
- /**
- * \brief Associate a device to a multicast object.
- *
- * Associates a device to a multicast object. The added device will be a part of
- * the multicast team of size specified by CUmulticastObjectProp::numDevices
- * during ::cuMulticastCreate.
- * The association of the device to the multicast object is permanent during
- * the life time of the multicast object.
- * All devices must be added to the multicast team before any memory can be
- * bound to any device in the team. Any calls to ::cuMulticastBindMem or
- * ::cuMulticastBindAddr will block until all devices have been added.
- * Similarly all devices must be added to the multicast team before a virtual
- * address range can be mapped to the multicast object. A call to ::cuMemMap
- * will block until all devices have been added.
- *
- * \param[in] mcHandle Handle representing a multicast object.
- * \param[in] dev Device that will be associated to the multicast
- * object.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMulticastCreate, ::cuMulticastBindMem, ::cuMulticastBindAddr
- */
- CUresult CUDAAPI cuMulticastAddDevice(CUmemGenericAllocationHandle mcHandle, CUdevice dev);
- /**
- * \brief Bind a memory allocation represented by a handle to a multicast object.
- *
- * Binds a memory allocation specified by \p memHandle and created via
- * ::cuMemCreate to a multicast object represented by \p mcHandle and created
- * via ::cuMulticastCreate. The intended \p size of the bind, the offset in the
- * multicast range \p mcOffset as well as the offset in the memory \p memOffset
- * must be a multiple of the value returned by ::cuMulticastGetGranularity with
- * the flag ::CU_MULTICAST_GRANULARITY_MINIMUM. For best performance however,
- * \p size, \p mcOffset and \p memOffset should be aligned to the granularity of
- * the memory allocation(see ::cuMemGetAllocationGranularity) or to the value
- * returned by ::cuMulticastGetGranularity with the flag
- * ::CU_MULTICAST_GRANULARITY_RECOMMENDED.
- *
- * The \p size + \p memOffset cannot be larger than the size of the allocated
- * memory. Similarly the \p size + \p mcOffset cannot be larger than the size
- * of the multicast object.
- * The memory allocation must have beeen created on one of the devices
- * that was added to the multicast team via ::cuMulticastAddDevice.
- * Externally shareable as well as imported multicast objects can be bound only
- * to externally shareable memory.
- * Note that this call will return CUDA_ERROR_OUT_OF_MEMORY if there are
- * insufficient resources required to perform the bind. This call may also
- * return CUDA_ERROR_SYSTEM_NOT_READY if the necessary system software is not
- * initialized or running.
- *
- * \param[in] mcHandle Handle representing a multicast object.
- * \param[in] mcOffset Offset into the multicast object for attachment.
- * \param[in] memHandle Handle representing a memory allocation.
- * \param[in] memOffset Offset into the memory for attachment.
- * \param[in] size Size of the memory that will be bound to the
- * multicast object.
- * \param[in] flags Flags for future use, must be zero for now.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_SYSTEM_NOT_READY
- *
- * \sa ::cuMulticastCreate, ::cuMulticastAddDevice, ::cuMemCreate
- */
- CUresult CUDAAPI cuMulticastBindMem(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUmemGenericAllocationHandle memHandle, size_t memOffset, size_t size, unsigned long long flags);
- /**
- * \brief Bind a memory allocation represented by a virtual address to a multicast object.
- *
- * Binds a memory allocation specified by its mapped address \p memptr to a
- * multicast object represented by \p mcHandle.
- * The memory must have been allocated via ::cuMemCreate or ::cudaMallocAsync.
- * The intended \p size of the bind, the offset in the multicast range
- * \p mcOffset and \p memptr must be a multiple of the value returned by
- * ::cuMulticastGetGranularity with the flag ::CU_MULTICAST_GRANULARITY_MINIMUM.
- * For best performance however, \p size, \p mcOffset and \p memptr should be
- * aligned to the value returned by ::cuMulticastGetGranularity with the flag
- * ::CU_MULTICAST_GRANULARITY_RECOMMENDED.
- *
- * The \p size cannot be larger than the size of the allocated memory.
- * Similarly the \p size + \p mcOffset cannot be larger than the total size
- * of the multicast object.
- * The memory allocation must have beeen created on one of the devices
- * that was added to the multicast team via ::cuMulticastAddDevice.
- * Externally shareable as well as imported multicast objects can be bound only
- * to externally shareable memory.
- * Note that this call will return CUDA_ERROR_OUT_OF_MEMORY if there are
- * insufficient resources required to perform the bind. This call may also
- * return CUDA_ERROR_SYSTEM_NOT_READY if the necessary system software is not
- * initialized or running.
- *
- * \param[in] mcHandle Handle representing a multicast object.
- * \param[in] mcOffset Offset into multicast va range for attachment.
- * \param[in] memptr Virtual address of the memory allocation.
- * \param[in] size Size of memory that will be bound to the
- * multicast object.
- * \param[in] flags Flags for future use, must be zero now.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_OUT_OF_MEMORY,
- * ::CUDA_ERROR_SYSTEM_NOT_READY
- *
- * \sa ::cuMulticastCreate, ::cuMulticastAddDevice, ::cuMemCreate
- */
- CUresult CUDAAPI cuMulticastBindAddr(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUdeviceptr memptr, size_t size, unsigned long long flags);
- /**
- * \brief Unbind any memory allocations bound to a multicast object at a given offset and upto a given size.
- *
- * Unbinds any memory allocations hosted on \p dev and bound to a multicast
- * object at \p mcOffset and upto a given \p size.
- * The intended \p size of the unbind and the offset in the multicast range
- * ( \p mcOffset ) must be a multiple of the value returned by
- * ::cuMulticastGetGranularity flag ::CU_MULTICAST_GRANULARITY_MINIMUM.
- * The \p size + \p mcOffset cannot be larger than the total size of the
- * multicast object.
- *
- * \note
- * Warning:
- * The \p mcOffset and the \p size must match the corresponding values specified
- * during the bind call. Any other values may result in undefined behavior.
- *
- * \param[in] mcHandle Handle representing a multicast object.
- * \param[in] dev Device that hosts the memory allocation.
- * \param[in] mcOffset Offset into the multicast object.
- * \param[in] size Desired size to unbind.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMulticastBindMem, ::cuMulticastBindAddr
- */
- CUresult CUDAAPI cuMulticastUnbind(CUmemGenericAllocationHandle mcHandle, CUdevice dev, size_t mcOffset, size_t size);
- /**
- * \brief Calculates either the minimal or recommended granularity for multicast object
- *
- * Calculates either the minimal or recommended granularity for a given set of
- * multicast object properties and returns it in granularity. This granularity
- * can be used as a multiple for size, bind offsets and address mappings of the
- * multicast object.
- *
- * \param[out] granularity Returned granularity.
- * \param[in] prop Properties of the multicast object.
- * \param[in] option Determines which granularity to return.
- *
- * \returns
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa ::cuMulticastCreate, ::cuMulticastBindMem, ::cuMulticastBindAddr, ::cuMulticastUnbind
- */
- CUresult CUDAAPI cuMulticastGetGranularity(size_t *granularity, const CUmulticastObjectProp *prop, CUmulticastGranularity_flags option);
- /** @} */ /* END CUDA_MULTICAST */
- /**
- * \defgroup CUDA_UNIFIED Unified Addressing
- *
- * ___MANBRIEF___ unified addressing functions of the low-level CUDA driver
- * API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the unified addressing functions of the
- * low-level CUDA driver application programming interface.
- *
- * @{
- *
- * \section CUDA_UNIFIED_overview Overview
- *
- * CUDA devices can share a unified address space with the host.
- * For these devices there is no distinction between a device
- * pointer and a host pointer -- the same pointer value may be
- * used to access memory from the host program and from a kernel
- * running on the device (with exceptions enumerated below).
- *
- * \section CUDA_UNIFIED_support Supported Platforms
- *
- * Whether or not a device supports unified addressing may be
- * queried by calling ::cuDeviceGetAttribute() with the device
- * attribute ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING.
- *
- * Unified addressing is automatically enabled in 64-bit processes
- *
- * \section CUDA_UNIFIED_lookup Looking Up Information from Pointer Values
- *
- * It is possible to look up information about the memory which backs a
- * pointer value. For instance, one may want to know if a pointer points
- * to host or device memory. As another example, in the case of device
- * memory, one may want to know on which CUDA device the memory
- * resides. These properties may be queried using the function
- * ::cuPointerGetAttribute()
- *
- * Since pointers are unique, it is not necessary to specify information
- * about the pointers specified to the various copy functions in the
- * CUDA API. The function ::cuMemcpy() may be used to perform a copy
- * between two pointers, ignoring whether they point to host or device
- * memory (making ::cuMemcpyHtoD(), ::cuMemcpyDtoD(), and ::cuMemcpyDtoH()
- * unnecessary for devices supporting unified addressing). For
- * multidimensional copies, the memory type ::CU_MEMORYTYPE_UNIFIED may be
- * used to specify that the CUDA driver should infer the location of the
- * pointer from its value.
- *
- * \section CUDA_UNIFIED_automaphost Automatic Mapping of Host Allocated Host Memory
- *
- * All host memory allocated in all contexts using ::cuMemAllocHost() and
- * ::cuMemHostAlloc() is always directly accessible from all contexts on
- * all devices that support unified addressing. This is the case regardless
- * of whether or not the flags ::CU_MEMHOSTALLOC_PORTABLE and
- * ::CU_MEMHOSTALLOC_DEVICEMAP are specified.
- *
- * The pointer value through which allocated host memory may be accessed
- * in kernels on all devices that support unified addressing is the same
- * as the pointer value through which that memory is accessed on the host,
- * so it is not necessary to call ::cuMemHostGetDevicePointer() to get the device
- * pointer for these allocations.
- *
- * Note that this is not the case for memory allocated using the flag
- * ::CU_MEMHOSTALLOC_WRITECOMBINED, as discussed below.
- *
- * \section CUDA_UNIFIED_autopeerregister Automatic Registration of Peer Memory
- *
- * Upon enabling direct access from a context that supports unified addressing
- * to another peer context that supports unified addressing using
- * ::cuCtxEnablePeerAccess() all memory allocated in the peer context using
- * ::cuMemAlloc() and ::cuMemAllocPitch() will immediately be accessible
- * by the current context. The device pointer value through
- * which any peer memory may be accessed in the current context
- * is the same pointer value through which that memory may be
- * accessed in the peer context.
- *
- * \section CUDA_UNIFIED_exceptions Exceptions, Disjoint Addressing
- *
- * Not all memory may be accessed on devices through the same pointer
- * value through which they are accessed on the host. These exceptions
- * are host memory registered using ::cuMemHostRegister() and host memory
- * allocated using the flag ::CU_MEMHOSTALLOC_WRITECOMBINED. For these
- * exceptions, there exists a distinct host and device address for the
- * memory. The device address is guaranteed to not overlap any valid host
- * pointer range and is guaranteed to have the same value across all
- * contexts that support unified addressing.
- *
- * This device address may be queried using ::cuMemHostGetDevicePointer()
- * when a context using unified addressing is current. Either the host
- * or the unified device pointer value may be used to refer to this memory
- * through ::cuMemcpy() and similar functions using the
- * ::CU_MEMORYTYPE_UNIFIED memory type.
- *
- */
- /**
- * \brief Returns information about a pointer
- *
- * The supported attributes are:
- *
- * - ::CU_POINTER_ATTRIBUTE_CONTEXT:
- *
- * Returns in \p *data the ::CUcontext in which \p ptr was allocated or
- * registered.
- * The type of \p data must be ::CUcontext *.
- *
- * If \p ptr was not allocated by, mapped by, or registered with
- * a ::CUcontext which uses unified virtual addressing then
- * ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * - ::CU_POINTER_ATTRIBUTE_MEMORY_TYPE:
- *
- * Returns in \p *data the physical memory type of the memory that
- * \p ptr addresses as a ::CUmemorytype enumerated value.
- * The type of \p data must be unsigned int.
- *
- * If \p ptr addresses device memory then \p *data is set to
- * ::CU_MEMORYTYPE_DEVICE. The particular ::CUdevice on which the
- * memory resides is the ::CUdevice of the ::CUcontext returned by the
- * ::CU_POINTER_ATTRIBUTE_CONTEXT attribute of \p ptr.
- *
- * If \p ptr addresses host memory then \p *data is set to
- * ::CU_MEMORYTYPE_HOST.
- *
- * If \p ptr was not allocated by, mapped by, or registered with
- * a ::CUcontext which uses unified virtual addressing then
- * ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * If the current ::CUcontext does not support unified virtual
- * addressing then ::CUDA_ERROR_INVALID_CONTEXT is returned.
- *
- * - ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER:
- *
- * Returns in \p *data the device pointer value through which
- * \p ptr may be accessed by kernels running in the current
- * ::CUcontext.
- * The type of \p data must be CUdeviceptr *.
- *
- * If there exists no device pointer value through which
- * kernels running in the current ::CUcontext may access
- * \p ptr then ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * If there is no current ::CUcontext then
- * ::CUDA_ERROR_INVALID_CONTEXT is returned.
- *
- * Except in the exceptional disjoint addressing cases discussed
- * below, the value returned in \p *data will equal the input
- * value \p ptr.
- *
- * - ::CU_POINTER_ATTRIBUTE_HOST_POINTER:
- *
- * Returns in \p *data the host pointer value through which
- * \p ptr may be accessed by by the host program.
- * The type of \p data must be void **.
- * If there exists no host pointer value through which
- * the host program may directly access \p ptr then
- * ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * Except in the exceptional disjoint addressing cases discussed
- * below, the value returned in \p *data will equal the input
- * value \p ptr.
- *
- * - ::CU_POINTER_ATTRIBUTE_P2P_TOKENS:
- *
- * Returns in \p *data two tokens for use with the nv-p2p.h Linux
- * kernel interface. \p data must be a struct of type
- * CUDA_POINTER_ATTRIBUTE_P2P_TOKENS.
- *
- * \p ptr must be a pointer to memory obtained from :cuMemAlloc().
- * Note that p2pToken and vaSpaceToken are only valid for the
- * lifetime of the source allocation. A subsequent allocation at
- * the same address may return completely different tokens.
- * Querying this attribute has a side effect of setting the attribute
- * ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS for the region of memory that
- * \p ptr points to.
- *
- * - ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS:
- *
- * A boolean attribute which when set, ensures that synchronous memory operations
- * initiated on the region of memory that \p ptr points to will always synchronize.
- * See further documentation in the section titled "API synchronization behavior"
- * to learn more about cases when synchronous memory operations can
- * exhibit asynchronous behavior.
- *
- * - ::CU_POINTER_ATTRIBUTE_BUFFER_ID:
- *
- * Returns in \p *data a buffer ID which is guaranteed to be unique within the process.
- * \p data must point to an unsigned long long.
- *
- * \p ptr must be a pointer to memory obtained from a CUDA memory allocation API.
- * Every memory allocation from any of the CUDA memory allocation APIs will
- * have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs
- * from previous freed allocations. IDs are only unique within a single process.
- *
- *
- * - ::CU_POINTER_ATTRIBUTE_IS_MANAGED:
- *
- * Returns in \p *data a boolean that indicates whether the pointer points to
- * managed memory or not.
- *
- * If \p ptr is not a valid CUDA pointer then ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * - ::CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL:
- *
- * Returns in \p *data an integer representing a device ordinal of a device against
- * which the memory was allocated or registered.
- *
- * - ::CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE:
- *
- * Returns in \p *data a boolean that indicates if this pointer maps to
- * an allocation that is suitable for ::cudaIpcGetMemHandle.
- *
- * - ::CU_POINTER_ATTRIBUTE_RANGE_START_ADDR:
- *
- * Returns in \p *data the starting address for the allocation referenced
- * by the device pointer \p ptr. Note that this is not necessarily the
- * address of the mapped region, but the address of the mappable address
- * range \p ptr references (e.g. from ::cuMemAddressReserve).
- *
- * - ::CU_POINTER_ATTRIBUTE_RANGE_SIZE:
- *
- * Returns in \p *data the size for the allocation referenced by the device
- * pointer \p ptr. Note that this is not necessarily the size of the mapped
- * region, but the size of the mappable address range \p ptr references
- * (e.g. from ::cuMemAddressReserve). To retrieve the size of the mapped
- * region, see ::cuMemGetAddressRange
- *
- * - ::CU_POINTER_ATTRIBUTE_MAPPED:
- *
- * Returns in \p *data a boolean that indicates if this pointer is in a
- * valid address range that is mapped to a backing allocation.
- *
- * - ::CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES:
- *
- * Returns a bitmask of the allowed handle types for an allocation that may
- * be passed to ::cuMemExportToShareableHandle.
- *
- * - ::CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE:
- *
- * Returns in \p *data the handle to the mempool that the allocation was obtained from.
- *
- * - ::CU_POINTER_ATTRIBUTE_IS_HW_DECOMPRESS_CAPABLE:
- *
- * Returns in \p *data a boolean that indicates whether the pointer points
- * to memory that is capable to be used for hardware accelerated
- * decompression.
- *
- * \par
- *
- * Note that for most allocations in the unified virtual address space
- * the host and device pointer for accessing the allocation will be the
- * same. The exceptions to this are
- * - user memory registered using ::cuMemHostRegister
- * - host memory allocated using ::cuMemHostAlloc with the
- * ::CU_MEMHOSTALLOC_WRITECOMBINED flag
- * For these types of allocation there will exist separate, disjoint host
- * and device addresses for accessing the allocation. In particular
- * - The host address will correspond to an invalid unmapped device address
- * (which will result in an exception if accessed from the device)
- * - The device address will correspond to an invalid unmapped host address
- * (which will result in an exception if accessed from the host).
- * For these types of allocations, querying ::CU_POINTER_ATTRIBUTE_HOST_POINTER
- * and ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER may be used to retrieve the host
- * and device addresses from either address.
- *
- * \param data - Returned pointer attribute value
- * \param attribute - Pointer attribute to query
- * \param ptr - Pointer
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuPointerSetAttribute,
- * ::cuMemAlloc,
- * ::cuMemFree,
- * ::cuMemAllocHost,
- * ::cuMemFreeHost,
- * ::cuMemHostAlloc,
- * ::cuMemHostRegister,
- * ::cuMemHostUnregister,
- * ::cudaPointerGetAttributes
- */
- CUresult CUDAAPI cuPointerGetAttribute(void *data, CUpointer_attribute attribute, CUdeviceptr ptr);
- /**
- * \brief Prefetches memory to the specified destination device
- *
- * Note there is a later version of this API, ::cuMemPrefetchAsync_v2. It will
- * supplant this version in 13.0, which is retained for minor version compatibility.
- *
- * Prefetches memory to the specified destination device. \p devPtr is the
- * base device pointer of the memory to be prefetched and \p dstDevice is the
- * destination device. \p count specifies the number of bytes to copy. \p hStream
- * is the stream in which the operation is enqueued. The memory range must refer
- * to managed memory allocated via ::cuMemAllocManaged or declared via __managed__ variables
- * or it may also refer to system-allocated memory on systems with non-zero
- * CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
- *
- * Passing in CU_DEVICE_CPU for \p dstDevice will prefetch the data to host memory. If
- * \p dstDevice is a GPU, then the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS
- * must be non-zero. Additionally, \p hStream must be associated with a device that has a
- * non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS.
- *
- * The start address and end address of the memory range will be rounded down and rounded up
- * respectively to be aligned to CPU page size before the prefetch operation is enqueued
- * in the stream.
- *
- * If no physical memory has been allocated for this region, then this memory region
- * will be populated and mapped on the destination device. If there's insufficient
- * memory to prefetch the desired region, the Unified Memory driver may evict pages from other
- * ::cuMemAllocManaged allocations to host memory in order to make room. Device memory
- * allocated using ::cuMemAlloc or ::cuArrayCreate will not be evicted.
- *
- * By default, any mappings to the previous location of the migrated pages are removed and
- * mappings for the new location are only setup on \p dstDevice. The exact behavior however
- * also depends on the settings applied to this memory range via ::cuMemAdvise as described
- * below:
- *
- * If ::CU_MEM_ADVISE_SET_READ_MOSTLY was set on any subset of this memory range,
- * then that subset will create a read-only copy of the pages on \p dstDevice.
- *
- * If ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION was called on any subset of this memory
- * range, then the pages will be migrated to \p dstDevice even if \p dstDevice is not the
- * preferred location of any pages in the memory range.
- *
- * If ::CU_MEM_ADVISE_SET_ACCESSED_BY was called on any subset of this memory range,
- * then mappings to those pages from all the appropriate processors are updated to
- * refer to the new location if establishing such a mapping is possible. Otherwise,
- * those mappings are cleared.
- *
- * Note that this API is not required for functionality and only serves to improve performance
- * by allowing the application to migrate data to a suitable location before it is accessed.
- * Memory accesses to this range are always coherent and are allowed even when the data is
- * actively being migrated.
- *
- * Note that this function is asynchronous with respect to the host and all work
- * on other devices.
- *
- * \param devPtr - Pointer to be prefetched
- * \param count - Size in bytes
- * \param dstDevice - Destination device to prefetch to
- * \param hStream - Stream to enqueue prefetch operation
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuMemcpy, ::cuMemcpyPeer, ::cuMemcpyAsync,
- * ::cuMemcpy3DPeerAsync, ::cuMemAdvise, ::cuMemPrefetchAsync
- * ::cudaMemPrefetchAsync_v2
- */
- CUresult CUDAAPI cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream);
- /**
- * \brief Prefetches memory to the specified destination location
- *
- * Prefetches memory to the specified destination location. \p devPtr is the
- * base device pointer of the memory to be prefetched and \p location specifies the
- * destination location. \p count specifies the number of bytes to copy. \p hStream
- * is the stream in which the operation is enqueued. The memory range must refer
- * to managed memory allocated via ::cuMemAllocManaged or declared via __managed__ variables.
- *
- * Specifying ::CU_MEM_LOCATION_TYPE_DEVICE for ::CUmemLocation::type will prefetch memory to GPU
- * specified by device ordinal ::CUmemLocation::id which must have non-zero value for the device attribute
- * ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Additionally, \p hStream must be associated with a device
- * that has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS.
- * Specifying ::CU_MEM_LOCATION_TYPE_HOST as ::CUmemLocation::type will prefetch data to host memory.
- * Applications can request prefetching memory to a specific host NUMA node by specifying
- * ::CU_MEM_LOCATION_TYPE_HOST_NUMA for ::CUmemLocation::type and a valid host NUMA node id in ::CUmemLocation::id
- * Users can also request prefetching memory to the host NUMA node closest to the current thread's CPU by specifying
- * ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT for ::CUmemLocation::type. Note when ::CUmemLocation::type is etiher
- * ::CU_MEM_LOCATION_TYPE_HOST OR ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, ::CUmemLocation::id will be ignored.
- *
- * The start address and end address of the memory range will be rounded down and rounded up
- * respectively to be aligned to CPU page size before the prefetch operation is enqueued
- * in the stream.
- *
- * If no physical memory has been allocated for this region, then this memory region
- * will be populated and mapped on the destination device. If there's insufficient
- * memory to prefetch the desired region, the Unified Memory driver may evict pages from other
- * ::cuMemAllocManaged allocations to host memory in order to make room. Device memory
- * allocated using ::cuMemAlloc or ::cuArrayCreate will not be evicted.
- *
- * By default, any mappings to the previous location of the migrated pages are removed and
- * mappings for the new location are only setup on the destination location. The exact behavior however
- * also depends on the settings applied to this memory range via ::cuMemAdvise as described
- * below:
- *
- * If ::CU_MEM_ADVISE_SET_READ_MOSTLY was set on any subset of this memory range,
- * then that subset will create a read-only copy of the pages on destination location.
- * If however the destination location is a host NUMA node, then any pages of that subset
- * that are already in another host NUMA node will be transferred to the destination.
- *
- * If ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION was called on any subset of this memory
- * range, then the pages will be migrated to \p location even if \p location is not the
- * preferred location of any pages in the memory range.
- *
- * If ::CU_MEM_ADVISE_SET_ACCESSED_BY was called on any subset of this memory range,
- * then mappings to those pages from all the appropriate processors are updated to
- * refer to the new location if establishing such a mapping is possible. Otherwise,
- * those mappings are cleared.
- *
- * Note that this API is not required for functionality and only serves to improve performance
- * by allowing the application to migrate data to a suitable location before it is accessed.
- * Memory accesses to this range are always coherent and are allowed even when the data is
- * actively being migrated.
- *
- * Note that this function is asynchronous with respect to the host and all work
- * on other devices.
- *
- * \param devPtr - Pointer to be prefetched
- * \param count - Size in bytes
- * \param dstDevice - Destination device to prefetch to
- * \param flags - flags for future use, must be zero now.
- * \param hStream - Stream to enqueue prefetch operation
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuMemcpy, ::cuMemcpyPeer, ::cuMemcpyAsync,
- * ::cuMemcpy3DPeerAsync, ::cuMemAdvise, ::cuMemPrefetchAsync
- * ::cudaMemPrefetchAsync_v2
- */
- CUresult CUDAAPI cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, unsigned int flags, CUstream hStream);
- /**
- * \brief Advise about the usage of a given memory range
- *
- * Note there is a later version of this API, ::cuMemAdvise_v2. It will
- * supplant this version in 13.0, which is retained for minor version compatibility.
- *
- * Advise the Unified Memory subsystem about the usage pattern for the memory range
- * starting at \p devPtr with a size of \p count bytes. The start address and end address of the memory
- * range will be rounded down and rounded up respectively to be aligned to CPU page size before the
- * advice is applied. The memory range must refer to managed memory allocated via ::cuMemAllocManaged
- * or declared via __managed__ variables. The memory range could also refer to system-allocated pageable
- * memory provided it represents a valid, host-accessible region of memory and all additional constraints
- * imposed by \p advice as outlined below are also satisfied. Specifying an invalid system-allocated pageable
- * memory range results in an error being returned.
- *
- * The \p advice parameter can take the following values:
- * - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read
- * from and only occasionally written to. Any read accesses from any processor to this region will create a
- * read-only copy of at least the accessed pages in that processor's memory. Additionally, if ::cuMemPrefetchAsync
- * is called on this region, it will create a read-only copy of the data on the destination processor.
- * If any processor writes to this region, all copies of the corresponding page will be invalidated
- * except for the one where the write occurred. The \p device argument is ignored for this advice.
- * Note that for a page to be read-duplicated, the accessing processor must either be the CPU or a GPU
- * that has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS.
- * Also, if a context is created on a device that does not have the device attribute
- * ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS set, then read-duplication will not occur until
- * all such contexts are destroyed.
- * If the memory region refers to valid system-allocated pageable memory, then the accessing device must
- * have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS for a read-only
- * copy to be created on that device. Note however that if the accessing device also has a non-zero value for the
- * device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, then setting this advice
- * will not create a read-only copy when that device accesses this memory region.
- *
- * - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY and also prevents the
- * Unified Memory driver from attempting heuristic read-duplication on the memory range. Any read-duplicated
- * copies of the data will be collapsed into a single copy. The location for the collapsed
- * copy will be the preferred location if the page has a preferred location and one of the read-duplicated
- * copies was resident at that location. Otherwise, the location chosen is arbitrary.
- *
- * - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the
- * data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the
- * preferred location as host memory. If \p device is a GPU, then it must have a non-zero value for the
- * device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Setting the preferred location
- * does not cause data to migrate to that location immediately. Instead, it guides the migration policy
- * when a fault occurs on that memory region. If the data is already in its preferred location and the
- * faulting processor can establish a mapping without requiring the data to be migrated, then
- * data migration will be avoided. On the other hand, if the data is not in its preferred location
- * or if a direct mapping cannot be established, then it will be migrated to the processor accessing
- * it. It is important to note that setting the preferred location does not prevent data prefetching
- * done using ::cuMemPrefetchAsync.
- * Having a preferred location can override the page thrash detection and resolution logic in the Unified
- * Memory driver. Normally, if a page is detected to be constantly thrashing between for example host and device
- * memory, the page may eventually be pinned to host memory by the Unified Memory driver. But
- * if the preferred location is set as device memory, then the page will continue to thrash indefinitely.
- * If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, then the
- * policies associated with that advice will override the policies of this advice, unless read accesses from
- * \p device will not result in a read-only copy being created on that device as outlined in description for
- * the advice ::CU_MEM_ADVISE_SET_READ_MOSTLY.
- * If the memory region refers to valid system-allocated pageable memory, then \p device must have a non-zero
- * value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
- *
- * - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION
- * and changes the preferred location to none.
- *
- * - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device.
- * Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. If \p device is a GPU, then
- * the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS must be non-zero.
- * This advice does not cause data migration and has no impact on the location of the data per se. Instead,
- * it causes the data to always be mapped in the specified processor's page tables, as long as the
- * location of the data permits a mapping to be established. If the data gets migrated for any reason,
- * the mappings are updated accordingly.
- * This advice is recommended in scenarios where data locality is not important, but avoiding faults is.
- * Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the
- * data located on one GPU is occasionally accessed by peer GPUs. In such scenarios, migrating data
- * over to the other GPUs is not as important because the accesses are infrequent and the overhead of
- * migration may be too high. But preventing faults can still help improve performance, and so having
- * a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated
- * to host memory because the CPU typically cannot access device memory directly. Any GPU that had the
- * ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the
- * page in host memory.
- * If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, then the
- * policies associated with that advice will override the policies of this advice. Additionally, if the
- * preferred location of this memory region or any subset of it is also \p device, then the policies
- * associated with ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION will override the policies of this advice.
- * If the memory region refers to valid system-allocated pageable memory, then \p device must have a non-zero
- * value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. Additionally, if \p device has
- * a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
- * then this call has no effect.
- *
- * - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of ::CU_MEM_ADVISE_SET_ACCESSED_BY. Any mappings to
- * the data from \p device may be removed at any time causing accesses to result in non-fatal page faults.
- * If the memory region refers to valid system-allocated pageable memory, then \p device must have a non-zero
- * value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. Additionally, if \p device has
- * a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
- * then this call has no effect.
- *
- * \param devPtr - Pointer to memory to set the advice for
- * \param count - Size in bytes of the memory range
- * \param advice - Advice to be applied for the specified memory range
- * \param device - Device to apply the advice for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuMemcpy, ::cuMemcpyPeer, ::cuMemcpyAsync,
- * ::cuMemcpy3DPeerAsync, ::cuMemPrefetchAsync, ::cuMemAdvise_v2
- * ::cudaMemAdvise
- */
- CUresult CUDAAPI cuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device);
- /**
- * \brief Advise about the usage of a given memory range
- *
- * Advise the Unified Memory subsystem about the usage pattern for the memory range
- * starting at \p devPtr with a size of \p count bytes. The start address and end address of the memory
- * range will be rounded down and rounded up respectively to be aligned to CPU page size before the
- * advice is applied. The memory range must refer to managed memory allocated via ::cuMemAllocManaged
- * or declared via __managed__ variables. The memory range could also refer to system-allocated pageable
- * memory provided it represents a valid, host-accessible region of memory and all additional constraints
- * imposed by \p advice as outlined below are also satisfied. Specifying an invalid system-allocated pageable
- * memory range results in an error being returned.
- *
- * The \p advice parameter can take the following values:
- * - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read
- * from and only occasionally written to. Any read accesses from any processor to this region will create a
- * read-only copy of at least the accessed pages in that processor's memory. Additionally, if ::cuMemPrefetchAsync
- * or ::cuMemPrefetchAsync_v2 is called on this region, it will create a read-only copy of the data on the destination processor.
- * If the target location for ::cuMemPrefetchAsync_v2 is a host NUMA node and a read-only copy already exists on
- * another host NUMA node, that copy will be migrated to the targeted host NUMA node.
- * If any processor writes to this region, all copies of the corresponding page will be invalidated
- * except for the one where the write occurred. If the writing processor is the CPU and the preferred location of
- * the page is a host NUMA node, then the page will also be migrated to that host NUMA node. The \p location argument is ignored for this advice.
- * Note that for a page to be read-duplicated, the accessing processor must either be the CPU or a GPU
- * that has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS.
- * Also, if a context is created on a device that does not have the device attribute
- * ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS set, then read-duplication will not occur until
- * all such contexts are destroyed.
- * If the memory region refers to valid system-allocated pageable memory, then the accessing device must
- * have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS for a read-only
- * copy to be created on that device. Note however that if the accessing device also has a non-zero value for the
- * device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, then setting this advice
- * will not create a read-only copy when that device accesses this memory region.
- *
- * - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY and also prevents the
- * Unified Memory driver from attempting heuristic read-duplication on the memory range. Any read-duplicated
- * copies of the data will be collapsed into a single copy. The location for the collapsed
- * copy will be the preferred location if the page has a preferred location and one of the read-duplicated
- * copies was resident at that location. Otherwise, the location chosen is arbitrary.
- * Note: The \p location argument is ignored for this advice.
- *
- * - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the
- * data to be the memory belonging to \p location. When ::CUmemLocation::type is ::CU_MEM_LOCATION_TYPE_HOST,
- * ::CUmemLocation::id is ignored and the preferred location is set to be host memory. To set the preferred location
- * to a specific host NUMA node, applications must set ::CUmemLocation::type to ::CU_MEM_LOCATION_TYPE_HOST_NUMA and
- * ::CUmemLocation::id must specify the NUMA ID of the host NUMA node. If ::CUmemLocation::type is set to ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT,
- * ::CUmemLocation::id will be ignored and the the host NUMA node closest to the calling thread's CPU will be used as the preferred location.
- * If ::CUmemLocation::type is a ::CU_MEM_LOCATION_TYPE_DEVICE, then ::CUmemLocation::id must be a valid device ordinal
- * and the device must have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS.
- * Setting the preferred location does not cause data to migrate to that location immediately. Instead, it guides the migration policy
- * when a fault occurs on that memory region. If the data is already in its preferred location and the
- * faulting processor can establish a mapping without requiring the data to be migrated, then
- * data migration will be avoided. On the other hand, if the data is not in its preferred location
- * or if a direct mapping cannot be established, then it will be migrated to the processor accessing
- * it. It is important to note that setting the preferred location does not prevent data prefetching
- * done using ::cuMemPrefetchAsync.
- * Having a preferred location can override the page thrash detection and resolution logic in the Unified
- * Memory driver. Normally, if a page is detected to be constantly thrashing between for example host and device
- * memory, the page may eventually be pinned to host memory by the Unified Memory driver. But
- * if the preferred location is set as device memory, then the page will continue to thrash indefinitely.
- * If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, then the
- * policies associated with that advice will override the policies of this advice, unless read accesses from
- * \p location will not result in a read-only copy being created on that procesor as outlined in description for
- * the advice ::CU_MEM_ADVISE_SET_READ_MOSTLY.
- * If the memory region refers to valid system-allocated pageable memory, and ::CUmemLocation::type is CU_MEM_LOCATION_TYPE_DEVICE
- * then ::CUmemLocation::id must be a valid device that has a non-zero alue for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
- *
- * - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION
- * and changes the preferred location to none. The \p location argument is ignored for this advice.
- *
- * - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by processor \p location.
- * The ::CUmemLocation::type must be either ::CU_MEM_LOCATION_TYPE_DEVICE with ::CUmemLocation::id representing a valid device
- * ordinal or ::CU_MEM_LOCATION_TYPE_HOST and ::CUmemLocation::id will be ignored. All other location types are invalid.
- * If ::CUmemLocation::id is a GPU, then the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS must be non-zero.
- * This advice does not cause data migration and has no impact on the location of the data per se. Instead,
- * it causes the data to always be mapped in the specified processor's page tables, as long as the
- * location of the data permits a mapping to be established. If the data gets migrated for any reason,
- * the mappings are updated accordingly.
- * This advice is recommended in scenarios where data locality is not important, but avoiding faults is.
- * Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the
- * data located on one GPU is occasionally accessed by peer GPUs. In such scenarios, migrating data
- * over to the other GPUs is not as important because the accesses are infrequent and the overhead of
- * migration may be too high. But preventing faults can still help improve performance, and so having
- * a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated
- * to host memory because the CPU typically cannot access device memory directly. Any GPU that had the
- * ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the
- * page in host memory.
- * If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, then the
- * policies associated with that advice will override the policies of this advice. Additionally, if the
- * preferred location of this memory region or any subset of it is also \p location, then the policies
- * associated with ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION will override the policies of this advice.
- * If the memory region refers to valid system-allocated pageable memory, and ::CUmemLocation::type is ::CU_MEM_LOCATION_TYPE_DEVICE
- * then device in ::CUmemLocation::id must have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
- * Additionally, if ::CUmemLocation::id has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
- * then this call has no effect.
- *
- * - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of ::CU_MEM_ADVISE_SET_ACCESSED_BY. Any mappings to
- * the data from \p location may be removed at any time causing accesses to result in non-fatal page faults.
- * If the memory region refers to valid system-allocated pageable memory, and ::CUmemLocation::type is ::CU_MEM_LOCATION_TYPE_DEVICE
- * then device in ::CUmemLocation::id must have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
- * Additionally, if ::CUmemLocation::id has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
- * then this call has no effect.
- *
- * \param devPtr - Pointer to memory to set the advice for
- * \param count - Size in bytes of the memory range
- * \param advice - Advice to be applied for the specified memory range
- * \param location - location to apply the advice for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuMemcpy, ::cuMemcpyPeer, ::cuMemcpyAsync,
- * ::cuMemcpy3DPeerAsync, ::cuMemPrefetchAsync, ::cuMemAdvise
- * ::cudaMemAdvise
- */
- CUresult CUDAAPI cuMemAdvise_v2(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUmemLocation location);
- /**
- * \brief Query an attribute of a given memory range
- *
- * Query an attribute about the memory range starting at \p devPtr with a size of \p count bytes. The
- * memory range must refer to managed memory allocated via ::cuMemAllocManaged or declared via
- * __managed__ variables.
- *
- * The \p attribute parameter can take the following values:
- * - ::CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY: If this attribute is specified, \p data will be interpreted
- * as a 32-bit integer, and \p dataSize must be 4. The result returned will be 1 if all pages in the given
- * memory range have read-duplication enabled, or 0 otherwise.
- * - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION: If this attribute is specified, \p data will be
- * interpreted as a 32-bit integer, and \p dataSize must be 4. The result returned will be a GPU device
- * id if all pages in the memory range have that GPU as their preferred location, or it will be CU_DEVICE_CPU
- * if all pages in the memory range have the CPU as their preferred location, or it will be CU_DEVICE_INVALID
- * if either all the pages don't have the same preferred location or some of the pages don't have a
- * preferred location at all. Note that the actual location of the pages in the memory range at the time of
- * the query may be different from the preferred location.
- * - ::CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY: If this attribute is specified, \p data will be interpreted
- * as an array of 32-bit integers, and \p dataSize must be a non-zero multiple of 4. The result returned
- * will be a list of device ids that had ::CU_MEM_ADVISE_SET_ACCESSED_BY set for that entire memory range.
- * If any device does not have that advice set for the entire memory range, that device will not be included.
- * If \p data is larger than the number of devices that have that advice set for that memory range,
- * CU_DEVICE_INVALID will be returned in all the extra space provided. For ex., if \p dataSize is 12
- * (i.e. \p data has 3 elements) and only device 0 has the advice set, then the result returned will be
- * { 0, CU_DEVICE_INVALID, CU_DEVICE_INVALID }. If \p data is smaller than the number of devices that have
- * that advice set, then only as many devices will be returned as can fit in the array. There is no
- * guarantee on which specific devices will be returned, however.
- * - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION: If this attribute is specified, \p data will be
- * interpreted as a 32-bit integer, and \p dataSize must be 4. The result returned will be the last location
- * to which all pages in the memory range were prefetched explicitly via ::cuMemPrefetchAsync. This will either be
- * a GPU id or CU_DEVICE_CPU depending on whether the last location for prefetch was a GPU or the CPU
- * respectively. If any page in the memory range was never explicitly prefetched or if all pages were not
- * prefetched to the same location, CU_DEVICE_INVALID will be returned. Note that this simply returns the
- * last location that the application requested to prefetch the memory range to. It gives no indication as to
- * whether the prefetch operation to that location has completed or even begun.
- * - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE: If this attribute is specified, \p data will be
- * interpreted as a ::CUmemLocationType, and \p dataSize must be sizeof(CUmemLocationType). The ::CUmemLocationType returned will be
- * ::CU_MEM_LOCATION_TYPE_DEVICE if all pages in the memory range have the same GPU as their preferred location, or ::CUmemLocationType
- * will be ::CU_MEM_LOCATION_TYPE_HOST if all pages in the memory range have the CPU as their preferred location, or it will be ::CU_MEM_LOCATION_TYPE_HOST_NUMA
- * if all the pages in the memory range have the same host NUMA node ID as their preferred location or it will be ::CU_MEM_LOCATION_TYPE_INVALID
- * if either all the pages don't have the same preferred location or some of the pages don't have a preferred location at all.
- * Note that the actual location type of the pages in the memory range at the time of the query may be different from the preferred location type.
- * - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID: If this attribute is specified, \p data will be
- * interpreted as a 32-bit integer, and \p dataSize must be 4. If the ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE query for the same address range
- * returns ::CU_MEM_LOCATION_TYPE_DEVICE, it will be a valid device ordinal or if it returns ::CU_MEM_LOCATION_TYPE_HOST_NUMA, it will be a valid host NUMA node ID
- * or if it returns any other location type, the id should be ignored.
- * - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE: If this attribute is specified, \p data will be
- * interpreted as a ::CUmemLocationType, and \p dataSize must be sizeof(CUmemLocationType). The result returned will be the last location
- * to which all pages in the memory range were prefetched explicitly via ::cuMemPrefetchAsync. The ::CUmemLocationType returned
- * will be ::CU_MEM_LOCATION_TYPE_DEVICE if the last prefetch location was a GPU or ::CU_MEM_LOCATION_TYPE_HOST if it was the CPU or ::CU_MEM_LOCATION_TYPE_HOST_NUMA if
- * the last prefetch location was a specific host NUMA node. If any page in the memory range was never explicitly prefetched or if all pages were not
- * prefetched to the same location, ::CUmemLocationType will be ::CU_MEM_LOCATION_TYPE_INVALID.
- * Note that this simply returns the last location type that the application requested to prefetch the memory range to. It gives no indication as to
- * whether the prefetch operation to that location has completed or even begun.
- * - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID: If this attribute is specified, \p data will be
- * interpreted as a 32-bit integer, and \p dataSize must be 4. If the ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE query for the same address range
- * returns ::CU_MEM_LOCATION_TYPE_DEVICE, it will be a valid device ordinal or if it returns ::CU_MEM_LOCATION_TYPE_HOST_NUMA, it will be a valid host NUMA node ID
- * or if it returns any other location type, the id should be ignored.
- *
- * \param data - A pointers to a memory location where the result
- * of each attribute query will be written to.
- * \param dataSize - Array containing the size of data
- * \param attribute - The attribute to query
- * \param devPtr - Start of the range to query
- * \param count - Size of the range to query
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- * \note_async
- * \note_null_stream
- *
- * \sa ::cuMemRangeGetAttributes, ::cuMemPrefetchAsync,
- * ::cuMemAdvise,
- * ::cudaMemRangeGetAttribute
- */
- CUresult CUDAAPI cuMemRangeGetAttribute(void *data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count);
- /**
- * \brief Query attributes of a given memory range.
- *
- * Query attributes of the memory range starting at \p devPtr with a size of \p count bytes. The
- * memory range must refer to managed memory allocated via ::cuMemAllocManaged or declared via
- * __managed__ variables. The \p attributes array will be interpreted to have \p numAttributes
- * entries. The \p dataSizes array will also be interpreted to have \p numAttributes entries.
- * The results of the query will be stored in \p data.
- *
- * The list of supported attributes are given below. Please refer to ::cuMemRangeGetAttribute for
- * attribute descriptions and restrictions.
- *
- * - ::CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY
- * - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION
- * - ::CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY
- * - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION
- * - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE
- * - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID
- * - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE
- * - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID
- *
- * \param data - A two-dimensional array containing pointers to memory
- * locations where the result of each attribute query will be written to.
- * \param dataSizes - Array containing the sizes of each result
- * \param attributes - An array of attributes to query
- * (numAttributes and the number of attributes in this array should match)
- * \param numAttributes - Number of attributes to query
- * \param devPtr - Start of the range to query
- * \param count - Size of the range to query
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa ::cuMemRangeGetAttribute, ::cuMemAdvise,
- * ::cuMemPrefetchAsync,
- * ::cudaMemRangeGetAttributes
- */
- CUresult CUDAAPI cuMemRangeGetAttributes(void **data, size_t *dataSizes, CUmem_range_attribute *attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count);
- /**
- * \brief Set attributes on a previously allocated memory region
- *
- * The supported attributes are:
- *
- * - ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS:
- *
- * A boolean attribute that can either be set (1) or unset (0). When set,
- * the region of memory that \p ptr points to is guaranteed to always synchronize
- * memory operations that are synchronous. If there are some previously initiated
- * synchronous memory operations that are pending when this attribute is set, the
- * function does not return until those memory operations are complete.
- * See further documentation in the section titled "API synchronization behavior"
- * to learn more about cases when synchronous memory operations can
- * exhibit asynchronous behavior.
- * \p value will be considered as a pointer to an unsigned integer to which this attribute is to be set.
- *
- * \param value - Pointer to memory containing the value to be set
- * \param attribute - Pointer attribute to set
- * \param ptr - Pointer to a memory region allocated using CUDA memory allocation APIs
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa ::cuPointerGetAttribute,
- * ::cuPointerGetAttributes,
- * ::cuMemAlloc,
- * ::cuMemFree,
- * ::cuMemAllocHost,
- * ::cuMemFreeHost,
- * ::cuMemHostAlloc,
- * ::cuMemHostRegister,
- * ::cuMemHostUnregister
- */
- CUresult CUDAAPI cuPointerSetAttribute(const void *value, CUpointer_attribute attribute, CUdeviceptr ptr);
- /**
- * \brief Returns information about a pointer.
- *
- * The supported attributes are (refer to ::cuPointerGetAttribute for attribute descriptions and restrictions):
- *
- * - ::CU_POINTER_ATTRIBUTE_CONTEXT
- * - ::CU_POINTER_ATTRIBUTE_MEMORY_TYPE
- * - ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER
- * - ::CU_POINTER_ATTRIBUTE_HOST_POINTER
- * - ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS
- * - ::CU_POINTER_ATTRIBUTE_BUFFER_ID
- * - ::CU_POINTER_ATTRIBUTE_IS_MANAGED
- * - ::CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL
- * - ::CU_POINTER_ATTRIBUTE_RANGE_START_ADDR
- * - ::CU_POINTER_ATTRIBUTE_RANGE_SIZE
- * - ::CU_POINTER_ATTRIBUTE_MAPPED
- * - ::CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE
- * - ::CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES
- * - ::CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE
- * - ::CU_POINTER_ATTRIBUTE_IS_HW_DECOMPRESS_CAPABLE
- *
- * \param numAttributes - Number of attributes to query
- * \param attributes - An array of attributes to query
- * (numAttributes and the number of attributes in this array should match)
- * \param data - A two-dimensional array containing pointers to memory
- * locations where the result of each attribute query will be written to.
- * \param ptr - Pointer to query
- *
- * Unlike ::cuPointerGetAttribute, this function will not return an error when the \p ptr
- * encountered is not a valid CUDA pointer. Instead, the attributes are assigned default NULL values
- * and CUDA_SUCCESS is returned.
- *
- * If \p ptr was not allocated by, mapped by, or registered with a ::CUcontext which uses UVA
- * (Unified Virtual Addressing), ::CUDA_ERROR_INVALID_CONTEXT is returned.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuPointerGetAttribute,
- * ::cuPointerSetAttribute,
- * ::cudaPointerGetAttributes
- */
- CUresult CUDAAPI cuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute *attributes, void **data, CUdeviceptr ptr);
- /** @} */ /* END CUDA_UNIFIED */
- /**
- * \defgroup CUDA_STREAM Stream Management
- *
- * ___MANBRIEF___ stream management functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the stream management functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Create a stream
- *
- * Creates a stream and returns a handle in \p phStream. The \p Flags argument
- * determines behaviors of the stream.
- *
- * Valid values for \p Flags are:
- * - ::CU_STREAM_DEFAULT: Default stream creation flag.
- * - ::CU_STREAM_NON_BLOCKING: Specifies that work running in the created
- * stream may run concurrently with work in stream 0 (the NULL stream), and that
- * the created stream should perform no implicit synchronization with stream 0.
- *
- * \param phStream - Returned newly created stream
- * \param Flags - Parameters for stream creation
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa ::cuStreamDestroy,
- * ::cuStreamCreateWithPriority,
- * ::cuGreenCtxStreamCreate,
- * ::cuStreamGetPriority,
- * ::cuStreamGetFlags,
- * ::cuStreamGetDevice
- * ::cuStreamWaitEvent,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamAddCallback,
- * ::cudaStreamCreate,
- * ::cudaStreamCreateWithFlags
- */
- CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags);
- /**
- * \brief Create a stream with the given priority
- *
- * Creates a stream with the specified priority and returns a handle in \p phStream.
- * This affects the scheduling priority of work in the stream. Priorities provide a
- * hint to preferentially run work with higher priority when possible, but do
- * not preempt already-running work or provide any other functional guarantee on
- * execution order.
- *
- * \p priority follows a convention where lower numbers represent higher priorities.
- * '0' represents default priority. The range of meaningful numerical priorities can
- * be queried using ::cuCtxGetStreamPriorityRange. If the specified priority is
- * outside the numerical range returned by ::cuCtxGetStreamPriorityRange,
- * it will automatically be clamped to the lowest or the highest number in the range.
- *
- * \param phStream - Returned newly created stream
- * \param flags - Flags for stream creation. See ::cuStreamCreate for a list of
- * valid flags
- * \param priority - Stream priority. Lower numbers represent higher priorities.
- * See ::cuCtxGetStreamPriorityRange for more information about
- * meaningful stream priorities that can be passed.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \note Stream priorities are supported only on GPUs
- * with compute capability 3.5 or higher.
- *
- * \note In the current implementation, only compute kernels launched in
- * priority streams are affected by the stream's priority. Stream priorities have
- * no effect on host-to-device and device-to-host memory operations.
- *
- * \sa ::cuStreamDestroy,
- * ::cuStreamCreate,
- * ::cuGreenCtxStreamCreate,
- * ::cuStreamGetPriority,
- * ::cuCtxGetStreamPriorityRange,
- * ::cuStreamGetFlags,
- * ::cuStreamGetDevice
- * ::cuStreamWaitEvent,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamAddCallback,
- * ::cudaStreamCreateWithPriority
- */
- CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, unsigned int flags, int priority);
- /**
- * \brief Query the priority of a given stream
- *
- * Query the priority of a stream created using ::cuStreamCreate, ::cuStreamCreateWithPriority or ::cuGreenCtxStreamCreate
- * and return the priority in \p priority. Note that if the stream was created with a
- * priority outside the numerical range returned by ::cuCtxGetStreamPriorityRange,
- * this function returns the clamped priority.
- * See ::cuStreamCreateWithPriority for details about priority clamping.
- *
- * \param hStream - Handle to the stream to be queried
- * \param priority - Pointer to a signed integer in which the stream's priority is returned
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa ::cuStreamDestroy,
- * ::cuStreamCreate,
- * ::cuStreamCreateWithPriority,
- * ::cuGreenCtxStreamCreate,
- * ::cuCtxGetStreamPriorityRange,
- * ::cuStreamGetFlags,
- * ::cuStreamGetDevice
- * ::cudaStreamGetPriority
- */
- CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority);
- /**
- * \brief Returns the device handle of the stream
- *
- * Returns in \p *device the device handle of the stream
- *
- * \param hStream - Handle to the stream to be queried
- * \param device - Returns the device to which a stream belongs
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa
- * ::cuStreamDestroy,
- * ::cuStreamCreate,
- * ::cuGreenCtxStreamCreate,
- * ::cuStreamGetFlags
- */
- CUresult CUDAAPI cuStreamGetDevice(CUstream hStream, CUdevice *device);
- /**
- * \brief Query the flags of a given stream
- *
- * Query the flags of a stream created using ::cuStreamCreate, ::cuStreamCreateWithPriority or ::cuGreenCtxStreamCreate
- * and return the flags in \p flags.
- *
- * \param hStream - Handle to the stream to be queried
- * \param flags - Pointer to an unsigned integer in which the stream's flags are returned
- * The value returned in \p flags is a logical 'OR' of all flags that
- * were used while creating this stream. See ::cuStreamCreate for the list
- * of valid flags
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa ::cuStreamDestroy,
- * ::cuStreamCreate,
- * ::cuGreenCtxStreamCreate,
- * ::cuStreamGetPriority,
- * ::cudaStreamGetFlags
- * ::cuStreamGetDevice
- */
- CUresult CUDAAPI cuStreamGetFlags(CUstream hStream, unsigned int *flags);
- /**
- * \brief Returns the unique Id associated with the stream handle supplied
- *
- * Returns in \p streamId the unique Id which is associated with the given stream handle.
- * The Id is unique for the life of the program.
- *
- * The stream handle \p hStream can refer to any of the following:
- * <ul>
- * <li>a stream created via any of the CUDA driver APIs such as ::cuStreamCreate
- * and ::cuStreamCreateWithPriority, or their runtime API equivalents such as
- * ::cudaStreamCreate, ::cudaStreamCreateWithFlags and ::cudaStreamCreateWithPriority.
- * Passing an invalid handle will result in undefined behavior.</li>
- * <li>any of the special streams such as the NULL stream, ::CU_STREAM_LEGACY and
- * ::CU_STREAM_PER_THREAD. The runtime API equivalents of these are also accepted,
- * which are NULL, ::cudaStreamLegacy and ::cudaStreamPerThread respectively.</li>
- * </ul>
- *
- * \param hStream - Handle to the stream to be queried
- * \param streamId - Pointer to store the Id of the stream
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuStreamDestroy,
- * ::cuStreamCreate,
- * ::cuStreamGetPriority,
- * ::cudaStreamGetId
- */
- CUresult CUDAAPI cuStreamGetId(CUstream hStream, unsigned long long *streamId);
- /**
- * \brief Query the context associated with a stream
- *
- * Returns the CUDA context that the stream is associated with.
- *
- * Note there is a later version of this API, ::cuStreamGetCtx_v2. It will
- * supplant this version in CUDA 13.0. It is recommended to use ::cuStreamGetCtx_v2
- * till then as this version will return ::CUDA_ERROR_NOT_SUPPORTED for streams created via the API ::cuGreenCtxStreamCreate.
- *
- * The stream handle \p hStream can refer to any of the following:
- * <ul>
- * <li>a stream created via any of the CUDA driver APIs such as ::cuStreamCreate
- * and ::cuStreamCreateWithPriority, or their runtime API equivalents such as
- * ::cudaStreamCreate, ::cudaStreamCreateWithFlags and ::cudaStreamCreateWithPriority.
- * The returned context is the context that was active in the calling thread when the
- * stream was created. Passing an invalid handle will result in undefined behavior.</li>
- * <li>any of the special streams such as the NULL stream, ::CU_STREAM_LEGACY and
- * ::CU_STREAM_PER_THREAD. The runtime API equivalents of these are also accepted,
- * which are NULL, ::cudaStreamLegacy and ::cudaStreamPerThread respectively.
- * Specifying any of the special handles will return the context current to the
- * calling thread. If no context is current to the calling thread,
- * ::CUDA_ERROR_INVALID_CONTEXT is returned.</li>
- * </ul>
- *
- * \param hStream - Handle to the stream to be queried
- * \param pctx - Returned context associated with the stream
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuStreamDestroy,
- * ::cuStreamCreateWithPriority,
- * ::cuStreamGetPriority,
- * ::cuStreamGetFlags,
- * ::cuStreamGetDevice
- * ::cuStreamWaitEvent,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamAddCallback,
- * ::cudaStreamCreate,
- * ::cuStreamGetCtx_v2,
- * ::cudaStreamCreateWithFlags
- */
- CUresult CUDAAPI cuStreamGetCtx(CUstream hStream, CUcontext *pctx);
- /**
- * \brief Query the contexts associated with a stream
- *
- * Returns the contexts that the stream is associated with.
- *
- * If the stream is associated with a green context, the API returns the green context in \p pGreenCtx
- * and the primary context of the associated device in \p pCtx.
- *
- * If the stream is associated with a regular context, the API returns the regular context in \p pCtx
- * and NULL in \p pGreenCtx.
- *
- * The stream handle \p hStream can refer to any of the following:
- * <ul>
- * <li>a stream created via any of the CUDA driver APIs such as ::cuStreamCreate,
- * ::cuStreamCreateWithPriority and ::cuGreenCtxStreamCreate, or their runtime API equivalents such as
- * ::cudaStreamCreate, ::cudaStreamCreateWithFlags and ::cudaStreamCreateWithPriority.
- * Passing an invalid handle will result in undefined behavior.</li>
- * <li>any of the special streams such as the NULL stream, ::CU_STREAM_LEGACY and
- * ::CU_STREAM_PER_THREAD. The runtime API equivalents of these are also accepted,
- * which are NULL, ::cudaStreamLegacy and ::cudaStreamPerThread respectively.
- * If any of the special handles are specified, the API will operate on the context current to the
- * calling thread. If a green context (that was converted via ::cuCtxFromGreenCtx() before setting it current)
- * is current to the calling thread, the API will return the green context in \p pGreenCtx
- * and the primary context of the associated device in \p pCtx. If a regular context is current,
- * the API returns the regular context in \p pCtx and NULL in \p pGreenCtx.
- * Note that specifying ::CU_STREAM_PER_THREAD or ::cudaStreamPerThread will return ::CUDA_ERROR_INVALID_HANDLE
- * if a green context is current to the calling thread.
- * If no context is current to the calling thread, ::CUDA_ERROR_INVALID_CONTEXT is returned.</li>
- * </ul>
- *
- * \param hStream - Handle to the stream to be queried
- * \param pCtx - Returned regular context associated with the stream
- * \param pGreenCtx - Returned green context if the stream is associated with a green context or NULL if not
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuStreamDestroy,
- * ::cuStreamCreate
- * ::cuStreamCreateWithPriority,
- * ::cuGreenCtxStreamCreate,
- * ::cuStreamGetPriority,
- * ::cuStreamGetFlags,
- * ::cuStreamGetDevice
- * ::cuStreamWaitEvent,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamAddCallback,
- * ::cudaStreamCreate,
- * ::cudaStreamCreateWithFlags,
- */
- CUresult CUDAAPI cuStreamGetCtx_v2(CUstream hStream, CUcontext *pCtx, CUgreenCtx *pGreenCtx);
- /**
- * \brief Make a compute stream wait on an event
- *
- * Makes all future work submitted to \p hStream wait for all work captured in
- * \p hEvent. See ::cuEventRecord() for details on what is captured by an event.
- * The synchronization will be performed efficiently on the device when applicable.
- * \p hEvent may be from a different context or device than \p hStream.
- *
- * flags include:
- * - ::CU_EVENT_WAIT_DEFAULT: Default event creation flag.
- * - ::CU_EVENT_WAIT_EXTERNAL: Event is captured in the graph as an external
- * event node when performing stream capture. This flag is invalid outside
- * of stream capture.
- *
- * \param hStream - Stream to wait
- * \param hEvent - Event to wait on (may not be NULL)
- * \param Flags - See ::CUevent_capture_flags
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuStreamCreate,
- * ::cuEventRecord,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamAddCallback,
- * ::cuStreamDestroy,
- * ::cudaStreamWaitEvent
- */
- CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
- /**
- * \brief Add a callback to a compute stream
- *
- * \note This function is slated for eventual deprecation and removal. If
- * you do not require the callback to execute in case of a device error,
- * consider using ::cuLaunchHostFunc. Additionally, this function is not
- * supported with ::cuStreamBeginCapture and ::cuStreamEndCapture, unlike
- * ::cuLaunchHostFunc.
- *
- * Adds a callback to be called on the host after all currently enqueued
- * items in the stream have completed. For each
- * cuStreamAddCallback call, the callback will be executed exactly once.
- * The callback will block later work in the stream until it is finished.
- *
- * The callback may be passed ::CUDA_SUCCESS or an error code. In the event
- * of a device error, all subsequently executed callbacks will receive an
- * appropriate ::CUresult.
- *
- * Callbacks must not make any CUDA API calls. Attempting to use a CUDA API
- * will result in ::CUDA_ERROR_NOT_PERMITTED. Callbacks must not perform any
- * synchronization that may depend on outstanding device work or other callbacks
- * that are not mandated to run earlier. Callbacks without a mandated order
- * (in independent streams) execute in undefined order and may be serialized.
- *
- * For the purposes of Unified Memory, callback execution makes a number of
- * guarantees:
- * <ul>
- * <li>The callback stream is considered idle for the duration of the
- * callback. Thus, for example, a callback may always use memory attached
- * to the callback stream.</li>
- * <li>The start of execution of a callback has the same effect as
- * synchronizing an event recorded in the same stream immediately prior to
- * the callback. It thus synchronizes streams which have been "joined"
- * prior to the callback.</li>
- * <li>Adding device work to any stream does not have the effect of making
- * the stream active until all preceding host functions and stream callbacks
- * have executed. Thus, for
- * example, a callback might use global attached memory even if work has
- * been added to another stream, if the work has been ordered behind the
- * callback with an event.</li>
- * <li>Completion of a callback does not cause a stream to become
- * active except as described above. The callback stream will remain idle
- * if no device work follows the callback, and will remain idle across
- * consecutive callbacks without device work in between. Thus, for example,
- * stream synchronization can be done by signaling from a callback at the
- * end of the stream.</li>
- * </ul>
- *
- * \param hStream - Stream to add callback to
- * \param callback - The function to call once preceding stream operations are complete
- * \param userData - User specified data to be passed to the callback function
- * \param flags - Reserved for future use, must be 0
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuStreamCreate,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamWaitEvent,
- * ::cuStreamDestroy,
- * ::cuMemAllocManaged,
- * ::cuStreamAttachMemAsync,
- * ::cuLaunchHostFunc,
- * ::cudaStreamAddCallback
- */
- CUresult CUDAAPI cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void *userData, unsigned int flags);
- /**
- * \brief Begins graph capture on a stream
- *
- * Begin graph capture on \p hStream. When a stream is in capture mode, all operations
- * pushed into the stream will not be executed, but will instead be captured into
- * a graph, which will be returned via ::cuStreamEndCapture. Capture may not be initiated
- * if \p stream is CU_STREAM_LEGACY. Capture must be ended on the same stream in which
- * it was initiated, and it may only be initiated if the stream is not already in capture
- * mode. The capture mode may be queried via ::cuStreamIsCapturing. A unique id
- * representing the capture sequence may be queried via ::cuStreamGetCaptureInfo.
- *
- * If \p mode is not ::CU_STREAM_CAPTURE_MODE_RELAXED, ::cuStreamEndCapture must be
- * called on this stream from the same thread.
- *
- * \param hStream - Stream in which to initiate capture
- * \param mode - Controls the interaction of this capture sequence with other API
- * calls that are potentially unsafe. For more details see
- * ::cuThreadExchangeStreamCaptureMode.
- *
- * \note Kernels captured using this API must not use texture and surface references.
- * Reading or writing through any texture or surface reference is undefined
- * behavior. This restriction does not apply to texture and surface objects.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::cuStreamCreate,
- * ::cuStreamIsCapturing,
- * ::cuStreamEndCapture,
- * ::cuThreadExchangeStreamCaptureMode
- */
- CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream, CUstreamCaptureMode mode);
- /**
- * \brief Begins graph capture on a stream to an existing graph
- *
- * Begin graph capture on \p hStream, placing new nodes into an existing graph. When a stream is
- * in capture mode, all operations pushed into the stream will not be executed, but will instead
- * be captured into \p hGraph. The graph will not be instantiable until the user calls
- * ::cuStreamEndCapture.
- *
- * Capture may not be initiated if \p stream is CU_STREAM_LEGACY. Capture must be ended on the
- * same stream in which it was initiated, and it may only be initiated if the stream is not
- * already in capture mode. The capture mode may be queried via ::cuStreamIsCapturing. A unique id
- * representing the capture sequence may be queried via ::cuStreamGetCaptureInfo.
- *
- * If \p mode is not ::CU_STREAM_CAPTURE_MODE_RELAXED, ::cuStreamEndCapture must be
- * called on this stream from the same thread.
- *
- * \param hStream - Stream in which to initiate capture.
- * \param hGraph - Graph to capture into.
- * \param dependencies - Dependencies of the first node captured in the stream. Can be NULL if numDependencies is 0.
- * \param dependencyData - Optional array of data associated with each dependency.
- * \param numDependencies - Number of dependencies.
- * \param mode - Controls the interaction of this capture sequence with other API
- * calls that are potentially unsafe. For more details see
- * ::cuThreadExchangeStreamCaptureMode.
- *
- * \note Kernels captured using this API must not use texture and surface references.
- * Reading or writing through any texture or surface reference is undefined
- * behavior. This restriction does not apply to texture and surface objects.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::cuStreamBeginCapture,
- * ::cuStreamCreate,
- * ::cuStreamIsCapturing,
- * ::cuStreamEndCapture,
- * ::cuThreadExchangeStreamCaptureMode,
- * ::cuGraphAddNode,
- */
- CUresult CUDAAPI cuStreamBeginCaptureToGraph(CUstream hStream, CUgraph hGraph, const CUgraphNode *dependencies, const CUgraphEdgeData *dependencyData, size_t numDependencies, CUstreamCaptureMode mode);
- /**
- * \brief Swaps the stream capture interaction mode for a thread
- *
- * Sets the calling thread's stream capture interaction mode to the value contained
- * in \p *mode, and overwrites \p *mode with the previous mode for the thread. To
- * facilitate deterministic behavior across function or module boundaries, callers
- * are encouraged to use this API in a push-pop fashion: \code
- CUstreamCaptureMode mode = desiredMode;
- cuThreadExchangeStreamCaptureMode(&mode);
- ...
- cuThreadExchangeStreamCaptureMode(&mode); // restore previous mode
- * \endcode
- *
- * During stream capture (see ::cuStreamBeginCapture), some actions, such as a call
- * to ::cudaMalloc, may be unsafe. In the case of ::cudaMalloc, the operation is
- * not enqueued asynchronously to a stream, and is not observed by stream capture.
- * Therefore, if the sequence of operations captured via ::cuStreamBeginCapture
- * depended on the allocation being replayed whenever the graph is launched, the
- * captured graph would be invalid.
- *
- * Therefore, stream capture places restrictions on API calls that can be made within
- * or concurrently to a ::cuStreamBeginCapture-::cuStreamEndCapture sequence. This
- * behavior can be controlled via this API and flags to ::cuStreamBeginCapture.
- *
- * A thread's mode is one of the following:
- * - \p CU_STREAM_CAPTURE_MODE_GLOBAL: This is the default mode. If the local thread has
- * an ongoing capture sequence that was not initiated with
- * \p CU_STREAM_CAPTURE_MODE_RELAXED at \p cuStreamBeginCapture, or if any other thread
- * has a concurrent capture sequence initiated with \p CU_STREAM_CAPTURE_MODE_GLOBAL,
- * this thread is prohibited from potentially unsafe API calls.
- * - \p CU_STREAM_CAPTURE_MODE_THREAD_LOCAL: If the local thread has an ongoing capture
- * sequence not initiated with \p CU_STREAM_CAPTURE_MODE_RELAXED, it is prohibited
- * from potentially unsafe API calls. Concurrent capture sequences in other threads
- * are ignored.
- * - \p CU_STREAM_CAPTURE_MODE_RELAXED: The local thread is not prohibited from potentially
- * unsafe API calls. Note that the thread is still prohibited from API calls which
- * necessarily conflict with stream capture, for example, attempting ::cuEventQuery
- * on an event that was last recorded inside a capture sequence.
- *
- * \param mode - Pointer to mode value to swap with the current mode
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::cuStreamBeginCapture
- */
- CUresult CUDAAPI cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode *mode);
- /**
- * \brief Ends capture on a stream, returning the captured graph
- *
- * End capture on \p hStream, returning the captured graph via \p phGraph.
- * Capture must have been initiated on \p hStream via a call to ::cuStreamBeginCapture.
- * If capture was invalidated, due to a violation of the rules of stream capture, then
- * a NULL graph will be returned.
- *
- * If the \p mode argument to ::cuStreamBeginCapture was not
- * ::CU_STREAM_CAPTURE_MODE_RELAXED, this call must be from the same thread as
- * ::cuStreamBeginCapture.
- *
- * \param hStream - Stream to query
- * \param phGraph - The captured graph
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD
- * \notefnerr
- *
- * \sa
- * ::cuStreamCreate,
- * ::cuStreamBeginCapture,
- * ::cuStreamIsCapturing,
- * ::cuGraphDestroy
- */
- CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph);
- /**
- * \brief Returns a stream's capture status
- *
- * Return the capture status of \p hStream via \p captureStatus. After a successful
- * call, \p *captureStatus will contain one of the following:
- * - ::CU_STREAM_CAPTURE_STATUS_NONE: The stream is not capturing.
- * - ::CU_STREAM_CAPTURE_STATUS_ACTIVE: The stream is capturing.
- * - ::CU_STREAM_CAPTURE_STATUS_INVALIDATED: The stream was capturing but an error
- * has invalidated the capture sequence. The capture sequence must be terminated
- * with ::cuStreamEndCapture on the stream where it was initiated in order to
- * continue using \p hStream.
- *
- * Note that, if this is called on ::CU_STREAM_LEGACY (the "null stream") while
- * a blocking stream in the same context is capturing, it will return
- * ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT and \p *captureStatus is unspecified
- * after the call. The blocking stream capture is not invalidated.
- *
- * When a blocking stream is capturing, the legacy stream is in an
- * unusable state until the blocking stream capture is terminated. The legacy
- * stream is not supported for stream capture, but attempted use would have an
- * implicit dependency on the capturing stream(s).
- *
- * \param hStream - Stream to query
- * \param captureStatus - Returns the stream's capture status
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT
- * \notefnerr
- *
- * \sa
- * ::cuStreamCreate,
- * ::cuStreamBeginCapture,
- * ::cuStreamEndCapture
- */
- CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus *captureStatus);
- /**
- * \brief Query a stream's capture state
- *
- * Query stream state related to stream capture.
- *
- * If called on ::CU_STREAM_LEGACY (the "null stream") while a stream not created
- * with ::CU_STREAM_NON_BLOCKING is capturing, returns ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT.
- *
- * Valid data (other than capture status) is returned only if both of the following are true:
- * - the call returns CUDA_SUCCESS
- * - the returned capture status is ::CU_STREAM_CAPTURE_STATUS_ACTIVE
- *
- * \param hStream - The stream to query
- * \param captureStatus_out - Location to return the capture status of the stream; required
- * \param id_out - Optional location to return an id for the capture sequence, which is
- * unique over the lifetime of the process
- * \param graph_out - Optional location to return the graph being captured into. All
- * operations other than destroy and node removal are permitted on the graph
- * while the capture sequence is in progress. This API does not transfer
- * ownership of the graph, which is transferred or destroyed at
- * ::cuStreamEndCapture. Note that the graph handle may be invalidated before
- * end of capture for certain errors. Nodes that are or become
- * unreachable from the original stream at ::cuStreamEndCapture due to direct
- * actions on the graph do not trigger ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED.
- * \param dependencies_out - Optional location to store a pointer to an array of nodes.
- * The next node to be captured in the stream will depend on this set of nodes,
- * absent operations such as event wait which modify this set. The array pointer
- * is valid until the next API call which operates on the stream or until the
- * capture is terminated. The node handles may be copied out and are valid until
- * they or the graph is destroyed. The driver-owned array may also be passed
- * directly to APIs that operate on the graph (not the stream) without copying.
- * \param numDependencies_out - Optional location to store the size of the array
- * returned in dependencies_out.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuStreamGetCaptureInfo_v3
- * ::cuStreamBeginCapture,
- * ::cuStreamIsCapturing,
- * ::cuStreamUpdateCaptureDependencies
- */
- CUresult CUDAAPI cuStreamGetCaptureInfo(CUstream hStream, CUstreamCaptureStatus *captureStatus_out,
- cuuint64_t *id_out, CUgraph *graph_out, const CUgraphNode **dependencies_out, size_t *numDependencies_out);
- /**
- * \brief Query a stream's capture state (12.3+)
- *
- * Query stream state related to stream capture.
- *
- * If called on ::CU_STREAM_LEGACY (the "null stream") while a stream not created
- * with ::CU_STREAM_NON_BLOCKING is capturing, returns ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT.
- *
- * Valid data (other than capture status) is returned only if both of the following are true:
- * - the call returns CUDA_SUCCESS
- * - the returned capture status is ::CU_STREAM_CAPTURE_STATUS_ACTIVE
- *
- * If \p edgeData_out is non-NULL then \p dependencies_out must be as well. If
- * \p dependencies_out is non-NULL and \p edgeData_out is NULL, but there is non-zero edge
- * data for one or more of the current stream dependencies, the call will return
- * ::CUDA_ERROR_LOSSY_QUERY.
- *
- * \param hStream - The stream to query
- * \param captureStatus_out - Location to return the capture status of the stream; required
- * \param id_out - Optional location to return an id for the capture sequence, which is
- * unique over the lifetime of the process
- * \param graph_out - Optional location to return the graph being captured into. All
- * operations other than destroy and node removal are permitted on the graph
- * while the capture sequence is in progress. This API does not transfer
- * ownership of the graph, which is transferred or destroyed at
- * ::cuStreamEndCapture. Note that the graph handle may be invalidated before
- * end of capture for certain errors. Nodes that are or become
- * unreachable from the original stream at ::cuStreamEndCapture due to direct
- * actions on the graph do not trigger ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED.
- * \param dependencies_out - Optional location to store a pointer to an array of nodes.
- * The next node to be captured in the stream will depend on this set of nodes,
- * absent operations such as event wait which modify this set. The array pointer
- * is valid until the next API call which operates on the stream or until the
- * capture is terminated. The node handles may be copied out and are valid until
- * they or the graph is destroyed. The driver-owned array may also be passed
- * directly to APIs that operate on the graph (not the stream) without copying.
- * \param edgeData_out - Optional location to store a pointer to an array of graph edge
- * data. This array parallels \c dependencies_out; the next node to be added
- * has an edge to \c dependencies_out[i] with annotation \c edgeData_out[i] for
- * each \c i. The array pointer is valid until the next API call which operates
- * on the stream or until the capture is terminated.
- * \param numDependencies_out - Optional location to store the size of the array
- * returned in dependencies_out.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT,
- * ::CUDA_ERROR_LOSSY_QUERY
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuStreamGetCaptureInfo
- * ::cuStreamBeginCapture,
- * ::cuStreamIsCapturing,
- * ::cuStreamUpdateCaptureDependencies
- */
- CUresult CUDAAPI cuStreamGetCaptureInfo_v3(CUstream hStream, CUstreamCaptureStatus *captureStatus_out,
- cuuint64_t *id_out, CUgraph *graph_out, const CUgraphNode **dependencies_out,
- const CUgraphEdgeData **edgeData_out, size_t *numDependencies_out);
- /**
- * \brief Update the set of dependencies in a capturing stream (11.3+)
- *
- * Modifies the dependency set of a capturing stream. The dependency set is the set
- * of nodes that the next captured node in the stream will depend on.
- *
- * Valid flags are ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES and
- * ::CU_STREAM_SET_CAPTURE_DEPENDENCIES. These control whether the set passed to
- * the API is added to the existing set or replaces it. A flags value of 0 defaults
- * to ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES.
- *
- * Nodes that are removed from the dependency set via this API do not result in
- * ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED if they are unreachable from the stream at
- * ::cuStreamEndCapture.
- *
- * Returns ::CUDA_ERROR_ILLEGAL_STATE if the stream is not capturing.
- *
- * This API is new in CUDA 11.3. Developers requiring compatibility across minor
- * versions to CUDA 11.0 should not use this API or provide a fallback.
- *
- * \param hStream - The stream to update
- * \param dependencies - The set of dependencies to add
- * \param numDependencies - The size of the dependencies array
- * \param flags - See above
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_ILLEGAL_STATE
- *
- * \sa
- * ::cuStreamBeginCapture,
- * ::cuStreamGetCaptureInfo,
- */
- CUresult CUDAAPI cuStreamUpdateCaptureDependencies(CUstream hStream, CUgraphNode *dependencies, size_t numDependencies, unsigned int flags);
- /**
- * \brief Update the set of dependencies in a capturing stream (12.3+)
- *
- * Modifies the dependency set of a capturing stream. The dependency set is the set
- * of nodes that the next captured node in the stream will depend on along with the
- * edge data for those dependencies.
- *
- * Valid flags are ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES and
- * ::CU_STREAM_SET_CAPTURE_DEPENDENCIES. These control whether the set passed to
- * the API is added to the existing set or replaces it. A flags value of 0 defaults
- * to ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES.
- *
- * Nodes that are removed from the dependency set via this API do not result in
- * ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED if they are unreachable from the stream at
- * ::cuStreamEndCapture.
- *
- * Returns ::CUDA_ERROR_ILLEGAL_STATE if the stream is not capturing.
- *
- * \param hStream - The stream to update
- * \param dependencies - The set of dependencies to add
- * \param dependencyData - Optional array of data associated with each dependency.
- * \param numDependencies - The size of the dependencies array
- * \param flags - See above
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_ILLEGAL_STATE
- *
- * \sa
- * ::cuStreamBeginCapture,
- * ::cuStreamGetCaptureInfo,
- */
- CUresult CUDAAPI cuStreamUpdateCaptureDependencies_v2(CUstream hStream, CUgraphNode *dependencies,
- const CUgraphEdgeData *dependencyData, size_t numDependencies, unsigned int flags);
- /**
- * \brief Attach memory to a stream asynchronously
- *
- * Enqueues an operation in \p hStream to specify stream association of
- * \p length bytes of memory starting from \p dptr. This function is a
- * stream-ordered operation, meaning that it is dependent on, and will
- * only take effect when, previous work in stream has completed. Any
- * previous association is automatically replaced.
- *
- * \p dptr must point to one of the following types of memories:
- * - managed memory declared using the __managed__ keyword or allocated with
- * ::cuMemAllocManaged.
- * - a valid host-accessible region of system-allocated pageable memory. This
- * type of memory may only be specified if the device associated with the
- * stream reports a non-zero value for the device attribute
- * ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
- *
- * For managed allocations, \p length must be either zero or the entire
- * allocation's size. Both indicate that the entire allocation's stream
- * association is being changed. Currently, it is not possible to change stream
- * association for a portion of a managed allocation.
- *
- * For pageable host allocations, \p length must be non-zero.
- *
- * The stream association is specified using \p flags which must be
- * one of ::CUmemAttach_flags.
- * If the ::CU_MEM_ATTACH_GLOBAL flag is specified, the memory can be accessed
- * by any stream on any device.
- * If the ::CU_MEM_ATTACH_HOST flag is specified, the program makes a guarantee
- * that it won't access the memory on the device from any stream on a device that
- * has a zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS.
- * If the ::CU_MEM_ATTACH_SINGLE flag is specified and \p hStream is associated with
- * a device that has a zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
- * the program makes a guarantee that it will only access the memory on the device
- * from \p hStream. It is illegal to attach singly to the NULL stream, because the
- * NULL stream is a virtual global stream and not a specific stream. An error will
- * be returned in this case.
- *
- * When memory is associated with a single stream, the Unified Memory system will
- * allow CPU access to this memory region so long as all operations in \p hStream
- * have completed, regardless of whether other streams are active. In effect,
- * this constrains exclusive ownership of the managed memory region by
- * an active GPU to per-stream activity instead of whole-GPU activity.
- *
- * Accessing memory on the device from streams that are not associated with
- * it will produce undefined results. No error checking is performed by the
- * Unified Memory system to ensure that kernels launched into other streams
- * do not access this region.
- *
- * It is a program's responsibility to order calls to ::cuStreamAttachMemAsync
- * via events, synchronization or other means to ensure legal access to memory
- * at all times. Data visibility and coherency will be changed appropriately
- * for all kernels which follow a stream-association change.
- *
- * If \p hStream is destroyed while data is associated with it, the association is
- * removed and the association reverts to the default visibility of the allocation
- * as specified at ::cuMemAllocManaged. For __managed__ variables, the default
- * association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a stream is an
- * asynchronous operation, and as a result, the change to default association won't
- * happen until all work in the stream has completed.
- *
- * \param hStream - Stream in which to enqueue the attach operation
- * \param dptr - Pointer to memory (must be a pointer to managed memory or
- * to a valid host-accessible region of system-allocated
- * pageable memory)
- * \param length - Length of memory
- * \param flags - Must be one of ::CUmemAttach_flags
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuStreamCreate,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamWaitEvent,
- * ::cuStreamDestroy,
- * ::cuMemAllocManaged,
- * ::cudaStreamAttachMemAsync
- */
- CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags);
- /**
- * \brief Determine status of a compute stream
- *
- * Returns ::CUDA_SUCCESS if all operations in the stream specified by
- * \p hStream have completed, or ::CUDA_ERROR_NOT_READY if not.
- *
- * For the purposes of Unified Memory, a return value of ::CUDA_SUCCESS
- * is equivalent to having called ::cuStreamSynchronize().
- *
- * \param hStream - Stream to query status of
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_READY
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuStreamCreate,
- * ::cuStreamWaitEvent,
- * ::cuStreamDestroy,
- * ::cuStreamSynchronize,
- * ::cuStreamAddCallback,
- * ::cudaStreamQuery
- */
- CUresult CUDAAPI cuStreamQuery(CUstream hStream);
- /**
- * \brief Wait until a stream's tasks are completed
- *
- * Waits until the device has completed all operations in the stream specified
- * by \p hStream. If the context was created with the
- * ::CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will block until the
- * stream is finished with all of its tasks.
- *
- * \param hStream - Stream to wait for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuStreamCreate,
- * ::cuStreamDestroy,
- * ::cuStreamWaitEvent,
- * ::cuStreamQuery,
- * ::cuStreamAddCallback,
- * ::cudaStreamSynchronize
- */
- CUresult CUDAAPI cuStreamSynchronize(CUstream hStream);
- /**
- * \brief Destroys a stream
- *
- * Destroys the stream specified by \p hStream.
- *
- * In case the device is still doing work in the stream \p hStream
- * when ::cuStreamDestroy() is called, the function will return immediately
- * and the resources associated with \p hStream will be released automatically
- * once the device has completed all work in \p hStream.
- *
- * \param hStream - Stream to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuStreamCreate,
- * ::cuStreamWaitEvent,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamAddCallback,
- * ::cudaStreamDestroy
- */
- CUresult CUDAAPI cuStreamDestroy(CUstream hStream);
- /**
- * \brief Copies attributes from source stream to destination stream.
- *
- * Copies attributes from source stream \p src to destination stream \p dst.
- * Both streams must have the same context.
- *
- * \param[out] dst Destination stream
- * \param[in] src Source stream
- * For list of attributes see ::CUstreamAttrID
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::CUaccessPolicyWindow
- */
- CUresult CUDAAPI cuStreamCopyAttributes(CUstream dst, CUstream src);
- /**
- * \brief Queries stream attribute.
- *
- * Queries attribute \p attr from \p hStream and stores it in corresponding
- * member of \p value_out.
- *
- * \param[in] hStream
- * \param[in] attr
- * \param[out] value_out
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa
- * ::CUaccessPolicyWindow
- */
- CUresult CUDAAPI cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr,
- CUstreamAttrValue *value_out);
- /**
- * \brief Sets stream attribute.
- *
- * Sets attribute \p attr on \p hStream from corresponding attribute of
- * \p value. The updated attribute will be applied to subsequent work
- * submitted to the stream. It will not affect previously submitted work.
- *
- * \param[out] hStream
- * \param[in] attr
- * \param[in] value
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa
- * ::CUaccessPolicyWindow
- */
- CUresult CUDAAPI cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr,
- const CUstreamAttrValue *value);
- /** @} */ /* END CUDA_STREAM */
- /**
- * \defgroup CUDA_EVENT Event Management
- *
- * ___MANBRIEF___ event management functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the event management functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Creates an event
- *
- * Creates an event *phEvent for the current context with the flags specified via
- * \p Flags. Valid flags include:
- * - ::CU_EVENT_DEFAULT: Default event creation flag.
- * - ::CU_EVENT_BLOCKING_SYNC: Specifies that the created event should use blocking
- * synchronization. A CPU thread that uses ::cuEventSynchronize() to wait on
- * an event created with this flag will block until the event has actually
- * been recorded.
- * - ::CU_EVENT_DISABLE_TIMING: Specifies that the created event does not need
- * to record timing data. Events created with this flag specified and
- * the ::CU_EVENT_BLOCKING_SYNC flag not specified will provide the best
- * performance when used with ::cuStreamWaitEvent() and ::cuEventQuery().
- * - ::CU_EVENT_INTERPROCESS: Specifies that the created event may be used as an
- * interprocess event by ::cuIpcGetEventHandle(). ::CU_EVENT_INTERPROCESS must
- * be specified along with ::CU_EVENT_DISABLE_TIMING.
- *
- * \param phEvent - Returns newly created event
- * \param Flags - Event creation flags
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \sa
- * ::cuEventRecord,
- * ::cuEventQuery,
- * ::cuEventSynchronize,
- * ::cuEventDestroy,
- * ::cuEventElapsedTime,
- * ::cudaEventCreate,
- * ::cudaEventCreateWithFlags
- */
- CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags);
- /**
- * \brief Records an event
- *
- * Captures in \p hEvent the contents of \p hStream at the time of this call.
- * \p hEvent and \p hStream must be from the same context otherwise
- * ::CUDA_ERROR_INVALID_HANDLE is returned.
- * Calls such as ::cuEventQuery() or ::cuStreamWaitEvent() will then
- * examine or wait for completion of the work that was captured. Uses of
- * \p hStream after this call do not modify \p hEvent. See note on default
- * stream behavior for what is captured in the default case.
- *
- * ::cuEventRecord() can be called multiple times on the same event and
- * will overwrite the previously captured state. Other APIs such as
- * ::cuStreamWaitEvent() use the most recently captured state at the time
- * of the API call, and are not affected by later calls to
- * ::cuEventRecord(). Before the first call to ::cuEventRecord(), an
- * event represents an empty set of work, so for example ::cuEventQuery()
- * would return ::CUDA_SUCCESS.
- *
- * \param hEvent - Event to record
- * \param hStream - Stream to record event for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuEventCreate,
- * ::cuEventQuery,
- * ::cuEventSynchronize,
- * ::cuStreamWaitEvent,
- * ::cuEventDestroy,
- * ::cuEventElapsedTime,
- * ::cudaEventRecord,
- * ::cuEventRecordWithFlags
- */
- CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream);
- /**
- * \brief Records an event
- *
- * Captures in \p hEvent the contents of \p hStream at the time of this call.
- * \p hEvent and \p hStream must be from the same context otherwise
- * ::CUDA_ERROR_INVALID_HANDLE is returned.
- * Calls such as ::cuEventQuery() or ::cuStreamWaitEvent() will then
- * examine or wait for completion of the work that was captured. Uses of
- * \p hStream after this call do not modify \p hEvent. See note on default
- * stream behavior for what is captured in the default case.
- *
- * ::cuEventRecordWithFlags() can be called multiple times on the same event and
- * will overwrite the previously captured state. Other APIs such as
- * ::cuStreamWaitEvent() use the most recently captured state at the time
- * of the API call, and are not affected by later calls to
- * ::cuEventRecordWithFlags(). Before the first call to ::cuEventRecordWithFlags(), an
- * event represents an empty set of work, so for example ::cuEventQuery()
- * would return ::CUDA_SUCCESS.
- *
- * flags include:
- * - ::CU_EVENT_RECORD_DEFAULT: Default event creation flag.
- * - ::CU_EVENT_RECORD_EXTERNAL: Event is captured in the graph as an external
- * event node when performing stream capture. This flag is invalid outside
- * of stream capture.
- *
- * \param hEvent - Event to record
- * \param hStream - Stream to record event for
- * \param flags - See ::CUevent_capture_flags
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuEventCreate,
- * ::cuEventQuery,
- * ::cuEventSynchronize,
- * ::cuStreamWaitEvent,
- * ::cuEventDestroy,
- * ::cuEventElapsedTime,
- * ::cuEventRecord,
- * ::cudaEventRecord
- */
- CUresult CUDAAPI cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags);
- /**
- * \brief Queries an event's status
- *
- * Queries the status of all work currently captured by \p hEvent. See
- * ::cuEventRecord() for details on what is captured by an event.
- *
- * Returns ::CUDA_SUCCESS if all captured work has been completed, or
- * ::CUDA_ERROR_NOT_READY if any captured work is incomplete.
- *
- * For the purposes of Unified Memory, a return value of ::CUDA_SUCCESS
- * is equivalent to having called ::cuEventSynchronize().
- *
- * \param hEvent - Event to query
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_READY
- * \notefnerr
- *
- * \sa ::cuEventCreate,
- * ::cuEventRecord,
- * ::cuEventSynchronize,
- * ::cuEventDestroy,
- * ::cuEventElapsedTime,
- * ::cudaEventQuery
- */
- CUresult CUDAAPI cuEventQuery(CUevent hEvent);
- /**
- * \brief Waits for an event to complete
- *
- * Waits until the completion of all work currently captured in \p hEvent.
- * See ::cuEventRecord() for details on what is captured by an event.
- *
- * Waiting for an event that was created with the ::CU_EVENT_BLOCKING_SYNC
- * flag will cause the calling CPU thread to block until the event has
- * been completed by the device. If the ::CU_EVENT_BLOCKING_SYNC flag has
- * not been set, then the CPU thread will busy-wait until the event has
- * been completed by the device.
- *
- * \param hEvent - Event to wait for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuEventCreate,
- * ::cuEventRecord,
- * ::cuEventQuery,
- * ::cuEventDestroy,
- * ::cuEventElapsedTime,
- * ::cudaEventSynchronize
- */
- CUresult CUDAAPI cuEventSynchronize(CUevent hEvent);
- /**
- * \brief Destroys an event
- *
- * Destroys the event specified by \p hEvent.
- *
- * An event may be destroyed before it is complete (i.e., while
- * ::cuEventQuery() would return ::CUDA_ERROR_NOT_READY). In this case, the
- * call does not block on completion of the event, and any associated
- * resources will automatically be released asynchronously at completion.
- *
- * \param hEvent - Event to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuEventCreate,
- * ::cuEventRecord,
- * ::cuEventQuery,
- * ::cuEventSynchronize,
- * ::cuEventElapsedTime,
- * ::cudaEventDestroy
- */
- CUresult CUDAAPI cuEventDestroy(CUevent hEvent);
- /**
- * \brief Computes the elapsed time between two events
- *
- * Computes the elapsed time between two events (in milliseconds with a
- * resolution of around 0.5 microseconds).
- *
- * If either event was last recorded in a non-NULL stream, the resulting time
- * may be greater than expected (even if both used the same stream handle). This
- * happens because the ::cuEventRecord() operation takes place asynchronously
- * and there is no guarantee that the measured latency is actually just between
- * the two events. Any number of other different stream operations could execute
- * in between the two measured events, thus altering the timing in a significant
- * way.
- *
- * If ::cuEventRecord() has not been called on either event then
- * ::CUDA_ERROR_INVALID_HANDLE is returned. If ::cuEventRecord() has been called
- * on both events but one or both of them has not yet been completed (that is,
- * ::cuEventQuery() would return ::CUDA_ERROR_NOT_READY on at least one of the
- * events), ::CUDA_ERROR_NOT_READY is returned. If either event was created with
- * the ::CU_EVENT_DISABLE_TIMING flag, then this function will return
- * ::CUDA_ERROR_INVALID_HANDLE.
- *
- * Note there is a later version of this API, ::cuEventElapsedTime_v2. It will
- * supplant this version in CUDA 13.0, which is retained for minor version compatibility.
- *
- * \param pMilliseconds - Time between \p hStart and \p hEnd in ms
- * \param hStart - Starting event
- * \param hEnd - Ending event
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_READY,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa ::cuEventCreate,
- * ::cuEventRecord,
- * ::cuEventQuery,
- * ::cuEventSynchronize,
- * ::cuEventDestroy,
- * ::cudaEventElapsedTime
- */
- CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
- /**
- * \brief Computes the elapsed time between two events
- *
- * Computes the elapsed time between two events (in milliseconds with a
- * resolution of around 0.5 microseconds). Note this API is not guaranteed
- * to return the latest errors for pending work. As such this API is intended to
- * serve as an elapsed time calculation only and any polling for completion on the
- * events to be compared should be done with ::cuEventQuery instead.
- *
- * If either event was last recorded in a non-NULL stream, the resulting time
- * may be greater than expected (even if both used the same stream handle). This
- * happens because the ::cuEventRecord() operation takes place asynchronously
- * and there is no guarantee that the measured latency is actually just between
- * the two events. Any number of other different stream operations could execute
- * in between the two measured events, thus altering the timing in a significant
- * way.
- *
- * If ::cuEventRecord() has not been called on either event then
- * ::CUDA_ERROR_INVALID_HANDLE is returned. If ::cuEventRecord() has been called
- * on both events but one or both of them has not yet been completed (that is,
- * ::cuEventQuery() would return ::CUDA_ERROR_NOT_READY on at least one of the
- * events), ::CUDA_ERROR_NOT_READY is returned. If either event was created with
- * the ::CU_EVENT_DISABLE_TIMING flag, then this function will return
- * ::CUDA_ERROR_INVALID_HANDLE.
- *
- * \param pMilliseconds - Time between \p hStart and \p hEnd in ms
- * \param hStart - Starting event
- * \param hEnd - Ending event
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_READY,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa ::cuEventCreate,
- * ::cuEventRecord,
- * ::cuEventQuery,
- * ::cuEventSynchronize,
- * ::cuEventDestroy,
- * ::cudaEventElapsedTime
- */
- CUresult CUDAAPI cuEventElapsedTime_v2(float *pMilliseconds, CUevent hStart, CUevent hEnd);
- /** @} */ /* END CUDA_EVENT */
- /**
- * \defgroup CUDA_EXTRES_INTEROP External Resource Interoperability
- *
- * ___MANBRIEF___ External resource interoperability functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the external resource interoperability functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Imports an external memory object
- *
- * Imports an externally allocated memory object and returns
- * a handle to that in \p extMem_out.
- *
- * The properties of the handle being imported must be described in
- * \p memHandleDesc. The ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC structure
- * is defined as follows:
- *
- * \code
- typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
- CUexternalMemoryHandleType type;
- union {
- int fd;
- struct {
- void *handle;
- const void *name;
- } win32;
- const void *nvSciBufObject;
- } handle;
- unsigned long long size;
- unsigned int flags;
- } CUDA_EXTERNAL_MEMORY_HANDLE_DESC;
- * \endcode
- *
- * where ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type specifies the type
- * of handle being imported. ::CUexternalMemoryHandleType is
- * defined as:
- *
- * \code
- typedef enum CUexternalMemoryHandleType_enum {
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8,
- } CUexternalMemoryHandleType;
- * \endcode
- *
- * If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, then
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::fd must be a valid
- * file descriptor referencing a memory object. Ownership of
- * the file descriptor is transferred to the CUDA driver when the
- * handle is imported successfully. Performing any operations on the
- * file descriptor after it is imported results in undefined behavior.
- *
- * If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32, then exactly one
- * of ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle and
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name must not be
- * NULL. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle
- * is not NULL, then it must represent a valid shared NT handle that
- * references a memory object. Ownership of this handle is
- * not transferred to CUDA after the import operation, so the
- * application must release the handle using the appropriate system
- * call. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name
- * is not NULL, then it must point to a NULL-terminated array of
- * UTF-16 characters that refers to a memory object.
- *
- * If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT, then
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle must
- * be non-NULL and
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name
- * must be NULL. The handle specified must be a globally shared KMT
- * handle. This handle does not hold a reference to the underlying
- * object, and thus will be invalid when all references to the
- * memory object are destroyed.
- *
- * If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP, then exactly one
- * of ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle and
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name must not be
- * NULL. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle
- * is not NULL, then it must represent a valid shared NT handle that
- * is returned by ID3D12Device::CreateSharedHandle when referring to a
- * ID3D12Heap object. This handle holds a reference to the underlying
- * object. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name
- * is not NULL, then it must point to a NULL-terminated array of
- * UTF-16 characters that refers to a ID3D12Heap object.
- *
- * If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE, then exactly one
- * of ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle and
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name must not be
- * NULL. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle
- * is not NULL, then it must represent a valid shared NT handle that
- * is returned by ID3D12Device::CreateSharedHandle when referring to a
- * ID3D12Resource object. This handle holds a reference to the
- * underlying object. If
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name
- * is not NULL, then it must point to a NULL-terminated array of
- * UTF-16 characters that refers to a ID3D12Resource object.
- *
- * If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE, then
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle must
- * represent a valid shared NT handle that is returned by
- * IDXGIResource1::CreateSharedHandle when referring to a
- * ID3D11Resource object. If
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name
- * is not NULL, then it must point to a NULL-terminated array of
- * UTF-16 characters that refers to a ID3D11Resource object.
- *
- * If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT, then
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle must
- * represent a valid shared KMT handle that is returned by
- * IDXGIResource::GetSharedHandle when referring to a
- * ID3D11Resource object and
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name
- * must be NULL.
- *
- * If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF, then
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::nvSciBufObject must be non-NULL
- * and reference a valid NvSciBuf object.
- * If the NvSciBuf object imported into CUDA is also mapped by other drivers, then the
- * application must use ::cuWaitExternalSemaphoresAsync or ::cuSignalExternalSemaphoresAsync
- * as appropriate barriers to maintain coherence between CUDA and the other drivers.
- * See ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC and ::CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC
- * for memory synchronization.
- *
- *
- * The size of the memory object must be specified in
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::size.
- *
- * Specifying the flag ::CUDA_EXTERNAL_MEMORY_DEDICATED in
- * ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::flags indicates that the
- * resource is a dedicated resource. The definition of what a
- * dedicated resource is outside the scope of this extension.
- * This flag must be set if ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type
- * is one of the following:
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT
- *
- * \param extMem_out - Returned handle to an external memory object
- * \param memHandleDesc - Memory import handle descriptor
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OPERATING_SYSTEM
- * \notefnerr
- *
- * \note If the Vulkan memory imported into CUDA is mapped on the CPU then the
- * application must use vkInvalidateMappedMemoryRanges/vkFlushMappedMemoryRanges
- * as well as appropriate Vulkan pipeline barriers to maintain coherence between
- * CPU and GPU. For more information on these APIs, please refer to "Synchronization
- * and Cache Control" chapter from Vulkan specification.
- *
- * \sa ::cuDestroyExternalMemory,
- * ::cuExternalMemoryGetMappedBuffer,
- * ::cuExternalMemoryGetMappedMipmappedArray
- */
- CUresult CUDAAPI cuImportExternalMemory(CUexternalMemory *extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc);
- /**
- * \brief Maps a buffer onto an imported memory object
- *
- * Maps a buffer onto an imported memory object and returns a device
- * pointer in \p devPtr.
- *
- * The properties of the buffer being mapped must be described in
- * \p bufferDesc. The ::CUDA_EXTERNAL_MEMORY_BUFFER_DESC structure is
- * defined as follows:
- *
- * \code
- typedef struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st {
- unsigned long long offset;
- unsigned long long size;
- unsigned int flags;
- } CUDA_EXTERNAL_MEMORY_BUFFER_DESC;
- * \endcode
- *
- * where ::CUDA_EXTERNAL_MEMORY_BUFFER_DESC::offset is the offset in
- * the memory object where the buffer's base address is.
- * ::CUDA_EXTERNAL_MEMORY_BUFFER_DESC::size is the size of the buffer.
- * ::CUDA_EXTERNAL_MEMORY_BUFFER_DESC::flags must be zero.
- *
- * The offset and size have to be suitably aligned to match the
- * requirements of the external API. Mapping two buffers whose ranges
- * overlap may or may not result in the same virtual address being
- * returned for the overlapped portion. In such cases, the application
- * must ensure that all accesses to that region from the GPU are
- * volatile. Otherwise writes made via one address are not guaranteed
- * to be visible via the other address, even if they're issued by the
- * same thread. It is recommended that applications map the combined
- * range instead of mapping separate buffers and then apply the
- * appropriate offsets to the returned pointer to derive the
- * individual buffers.
- *
- * The returned pointer \p devPtr must be freed using ::cuMemFree.
- *
- * \param devPtr - Returned device pointer to buffer
- * \param extMem - Handle to external memory object
- * \param bufferDesc - Buffer descriptor
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuImportExternalMemory,
- * ::cuDestroyExternalMemory,
- * ::cuExternalMemoryGetMappedMipmappedArray
- */
- CUresult CUDAAPI cuExternalMemoryGetMappedBuffer(CUdeviceptr *devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc);
- /**
- * \brief Maps a CUDA mipmapped array onto an external memory object
- *
- * Maps a CUDA mipmapped array onto an external object and returns a
- * handle to it in \p mipmap.
- *
- * The properties of the CUDA mipmapped array being mapped must be
- * described in \p mipmapDesc. The structure
- * ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC is defined as follows:
- *
- * \code
- typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {
- unsigned long long offset;
- CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
- unsigned int numLevels;
- } CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC;
- * \endcode
- *
- * where ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::offset is the
- * offset in the memory object where the base level of the mipmap
- * chain is.
- * ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::arrayDesc describes
- * the format, dimensions and type of the base level of the mipmap
- * chain. For further details on these parameters, please refer to the
- * documentation for ::cuMipmappedArrayCreate. Note that if the mipmapped
- * array is bound as a color target in the graphics API, then the flag
- * ::CUDA_ARRAY3D_COLOR_ATTACHMENT must be specified in
- * ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::arrayDesc::Flags.
- * ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::numLevels specifies
- * the total number of levels in the mipmap chain.
- *
- * If \p extMem was imported from a handle of type ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF, then
- * ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::numLevels must be equal to 1.
- *
- *
- * The returned CUDA mipmapped array must be freed using ::cuMipmappedArrayDestroy.
- *
- * \param mipmap - Returned CUDA mipmapped array
- * \param extMem - Handle to external memory object
- * \param mipmapDesc - CUDA array descriptor
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuImportExternalMemory,
- * ::cuDestroyExternalMemory,
- * ::cuExternalMemoryGetMappedBuffer
- */
- CUresult CUDAAPI cuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray *mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc);
- /**
- * \brief Destroys an external memory object.
- *
- * Destroys the specified external memory object. Any existing buffers
- * and CUDA mipmapped arrays mapped onto this object must no longer be
- * used and must be explicitly freed using ::cuMemFree and
- * ::cuMipmappedArrayDestroy respectively.
- *
- * \param extMem - External memory object to be destroyed
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuImportExternalMemory,
- * ::cuExternalMemoryGetMappedBuffer,
- * ::cuExternalMemoryGetMappedMipmappedArray
- */
- CUresult CUDAAPI cuDestroyExternalMemory(CUexternalMemory extMem);
- /**
- * \brief Imports an external semaphore
- *
- * Imports an externally allocated synchronization object and returns
- * a handle to that in \p extSem_out.
- *
- * The properties of the handle being imported must be described in
- * \p semHandleDesc. The ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC is
- * defined as follows:
- *
- * \code
- typedef struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
- CUexternalSemaphoreHandleType type;
- union {
- int fd;
- struct {
- void *handle;
- const void *name;
- } win32;
- const void* NvSciSyncObj;
- } handle;
- unsigned int flags;
- } CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC;
- * \endcode
- *
- * where ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type specifies the type of
- * handle being imported. ::CUexternalSemaphoreHandleType is defined
- * as:
- *
- * \code
- typedef enum CUexternalSemaphoreHandleType_enum {
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1,
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2,
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4,
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5,
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6,
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7,
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8,
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9,
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10
- } CUexternalSemaphoreHandleType;
- * \endcode
- *
- * If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, then
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::fd must be a valid
- * file descriptor referencing a synchronization object. Ownership of
- * the file descriptor is transferred to the CUDA driver when the
- * handle is imported successfully. Performing any operations on the
- * file descriptor after it is imported results in undefined behavior.
- *
- * If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32, then exactly one
- * of ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle and
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name must not be
- * NULL. If
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle
- * is not NULL, then it must represent a valid shared NT handle that
- * references a synchronization object. Ownership of this handle is
- * not transferred to CUDA after the import operation, so the
- * application must release the handle using the appropriate system
- * call. If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name
- * is not NULL, then it must name a valid synchronization object.
- *
- * If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT, then
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle must
- * be non-NULL and
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name
- * must be NULL. The handle specified must be a globally shared KMT
- * handle. This handle does not hold a reference to the underlying
- * object, and thus will be invalid when all references to the
- * synchronization object are destroyed.
- *
- * If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE, then exactly one
- * of ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle and
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name must not be
- * NULL. If
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle
- * is not NULL, then it must represent a valid shared NT handle that
- * is returned by ID3D12Device::CreateSharedHandle when referring to a
- * ID3D12Fence object. This handle holds a reference to the underlying
- * object. If
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name
- * is not NULL, then it must name a valid synchronization object that
- * refers to a valid ID3D12Fence object.
- *
- * If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE, then
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle
- * represents a valid shared NT handle that is returned by
- * ID3D11Fence::CreateSharedHandle. If
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name
- * is not NULL, then it must name a valid synchronization object that
- * refers to a valid ID3D11Fence object.
- *
- * If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, then
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::nvSciSyncObj
- * represents a valid NvSciSyncObj.
- *
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX, then
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle
- * represents a valid shared NT handle that
- * is returned by IDXGIResource1::CreateSharedHandle when referring to
- * a IDXGIKeyedMutex object. If
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name
- * is not NULL, then it must name a valid synchronization object that
- * refers to a valid IDXGIKeyedMutex object.
- *
- * If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT, then
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle
- * represents a valid shared KMT handle that
- * is returned by IDXGIResource::GetSharedHandle when referring to
- * a IDXGIKeyedMutex object and
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name must be NULL.
- *
- * If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD, then
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::fd must be a valid
- * file descriptor referencing a synchronization object. Ownership of
- * the file descriptor is transferred to the CUDA driver when the
- * handle is imported successfully. Performing any operations on the
- * file descriptor after it is imported results in undefined behavior.
- *
- * If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32, then exactly one
- * of ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle and
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name must not be
- * NULL. If
- * ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle
- * is not NULL, then it must represent a valid shared NT handle that
- * references a synchronization object. Ownership of this handle is
- * not transferred to CUDA after the import operation, so the
- * application must release the handle using the appropriate system
- * call. If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name
- * is not NULL, then it must name a valid synchronization object.
- *
- * \param extSem_out - Returned handle to an external semaphore
- * \param semHandleDesc - Semaphore import handle descriptor
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OPERATING_SYSTEM
- * \notefnerr
- *
- * \sa ::cuDestroyExternalSemaphore,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync
- */
- CUresult CUDAAPI cuImportExternalSemaphore(CUexternalSemaphore *extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc);
- /**
- * \brief Signals a set of external semaphore objects
- *
- * Enqueues a signal operation on a set of externally allocated
- * semaphore object in the specified stream. The operations will be
- * executed when all prior operations in the stream complete.
- *
- * The exact semantics of signaling a semaphore depends on the type of
- * the object.
- *
- * If the semaphore object is any one of the following types:
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT
- * then signaling the semaphore will set it to the signaled state.
- *
- * If the semaphore object is any one of the following types:
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32
- * then the semaphore will be set to the value specified in
- * ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS::params::fence::value.
- *
- * If the semaphore object is of the type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC
- * this API sets ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS::params::nvSciSync::fence
- * to a value that can be used by subsequent waiters of the same NvSciSync object
- * to order operations with those currently submitted in \p stream. Such an update
- * will overwrite previous contents of
- * ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS::params::nvSciSync::fence. By default,
- * signaling such an external semaphore object causes appropriate memory synchronization
- * operations to be performed over all external memory objects that are imported as
- * ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF. This ensures that any subsequent accesses
- * made by other importers of the same set of NvSciBuf memory object(s) are coherent.
- * These operations can be skipped by specifying the flag
- * ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC, which can be used as a
- * performance optimization when data coherency is not required. But specifying this
- * flag in scenarios where data coherency is required results in undefined behavior.
- * Also, for semaphore object of the type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC,
- * if the NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags in
- * ::cuDeviceGetNvSciSyncAttributes to CUDA_NVSCISYNC_ATTR_SIGNAL, this API will return
- * CUDA_ERROR_NOT_SUPPORTED.
- * NvSciSyncFence associated with semaphore object of the type
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC can be deterministic. For this the
- * NvSciSyncAttrList used to create the semaphore object must have value of
- * NvSciSyncAttrKey_RequireDeterministicFences key set to true. Deterministic fences
- * allow users to enqueue a wait over the semaphore object even before corresponding
- * signal is enqueued. For such a semaphore object, CUDA guarantees that each signal
- * operation will increment the fence value by '1'. Users are expected to track count
- * of signals enqueued on the semaphore object and insert waits accordingly. When such
- * a semaphore object is signaled from multiple streams, due to concurrent stream
- * execution, it is possible that the order in which the semaphore gets signaled is
- * indeterministic. This could lead to waiters of the semaphore getting unblocked
- * incorrectly. Users are expected to handle such situations, either by not using the
- * same semaphore object with deterministic fence support enabled in different streams
- * or by adding explicit dependency amongst such streams so that the semaphore is
- * signaled in order.
- *
- * If the semaphore object is any one of the following types:
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT
- * then the keyed mutex will be released with the key specified in
- * ::CUDA_EXTERNAL_SEMAPHORE_PARAMS::params::keyedmutex::key.
- *
- * \param extSemArray - Set of external semaphores to be signaled
- * \param paramsArray - Array of semaphore parameters
- * \param numExtSems - Number of semaphores to signal
- * \param stream - Stream to enqueue the signal operations in
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuImportExternalSemaphore,
- * ::cuDestroyExternalSemaphore,
- * ::cuWaitExternalSemaphoresAsync
- */
- CUresult CUDAAPI cuSignalExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream);
- /**
- * \brief Waits on a set of external semaphore objects
- *
- * Enqueues a wait operation on a set of externally allocated
- * semaphore object in the specified stream. The operations will be
- * executed when all prior operations in the stream complete.
- *
- * The exact semantics of waiting on a semaphore depends on the type
- * of the object.
- *
- * If the semaphore object is any one of the following types:
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT
- * then waiting on the semaphore will wait until the semaphore reaches
- * the signaled state. The semaphore will then be reset to the
- * unsignaled state. Therefore for every signal operation, there can
- * only be one wait operation.
- *
- * If the semaphore object is any one of the following types:
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32
- * then waiting on the semaphore will wait until the value of the
- * semaphore is greater than or equal to
- * ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS::params::fence::value.
- *
- * If the semaphore object is of the type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC
- * then, waiting on the semaphore will wait until the
- * ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS::params::nvSciSync::fence is signaled by the
- * signaler of the NvSciSyncObj that was associated with this semaphore object.
- * By default, waiting on such an external semaphore object causes appropriate
- * memory synchronization operations to be performed over all external memory objects
- * that are imported as ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF. This ensures that
- * any subsequent accesses made by other importers of the same set of NvSciBuf memory
- * object(s) are coherent. These operations can be skipped by specifying the flag
- * ::CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC, which can be used as a
- * performance optimization when data coherency is not required. But specifying this
- * flag in scenarios where data coherency is required results in undefined behavior.
- * Also, for semaphore object of the type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC,
- * if the NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags in
- * ::cuDeviceGetNvSciSyncAttributes to CUDA_NVSCISYNC_ATTR_WAIT, this API will return
- * CUDA_ERROR_NOT_SUPPORTED.
- *
- * If the semaphore object is any one of the following types:
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX,
- * ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT
- * then the keyed mutex will be acquired when it is released with the key
- * specified in ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS::params::keyedmutex::key
- * or until the timeout specified by
- * ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS::params::keyedmutex::timeoutMs
- * has lapsed. The timeout interval can either be a finite value
- * specified in milliseconds or an infinite value. In case an infinite
- * value is specified the timeout never elapses. The windows INFINITE
- * macro must be used to specify infinite timeout.
- *
- * \param extSemArray - External semaphores to be waited on
- * \param paramsArray - Array of semaphore parameters
- * \param numExtSems - Number of semaphores to wait on
- * \param stream - Stream to enqueue the wait operations in
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_TIMEOUT
- * \notefnerr
- *
- * \sa ::cuImportExternalSemaphore,
- * ::cuDestroyExternalSemaphore,
- * ::cuSignalExternalSemaphoresAsync
- */
- CUresult CUDAAPI cuWaitExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream);
- /**
- * \brief Destroys an external semaphore
- *
- * Destroys an external semaphore object and releases any references
- * to the underlying resource. Any outstanding signals or waits must
- * have completed before the semaphore is destroyed.
- *
- * \param extSem - External semaphore to be destroyed
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa ::cuImportExternalSemaphore,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync
- */
- CUresult CUDAAPI cuDestroyExternalSemaphore(CUexternalSemaphore extSem);
- /** @} */ /* END CUDA_EXTRES_INTEROP */
- /**
- * \defgroup CUDA_MEMOP Stream Memory Operations
- *
- * ___MANBRIEF___ Stream memory operations of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the stream memory operations of the low-level CUDA
- * driver application programming interface.
- *
- * Support for the ::CU_STREAM_WAIT_VALUE_NOR flag can be queried with
- * ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2.
- *
- * Support for the ::cuStreamWriteValue64() and ::cuStreamWaitValue64()
- * functions, as well as for the ::CU_STREAM_MEM_OP_WAIT_VALUE_64 and
- * ::CU_STREAM_MEM_OP_WRITE_VALUE_64 flags, can be queried with
- * ::CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS.
- *
- * Support for both ::CU_STREAM_WAIT_VALUE_FLUSH and
- * ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES requires dedicated platform
- * hardware features and can be queried with ::cuDeviceGetAttribute() and
- * ::CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES.
- *
- * Note that all memory pointers passed as parameters to these operations
- * are device pointers. Where necessary a device pointer should be
- * obtained, for example with ::cuMemHostGetDevicePointer().
- *
- * None of the operations accepts pointers to managed memory buffers
- * (::cuMemAllocManaged).
- *
- * \note
- * Warning:
- * Improper use of these APIs may deadlock the application. Synchronization
- * ordering established through these APIs is not visible to CUDA. CUDA tasks
- * that are (even indirectly) ordered by these APIs should also have that order
- * expressed with CUDA-visible dependencies such as events. This ensures that
- * the scheduler does not serialize them in an improper order.
- *
- * @{
- */
- /**
- * \brief Wait on a memory location
- *
- * Enqueues a synchronization of the stream on the given memory location. Work
- * ordered after the operation will block until the given condition on the
- * memory is satisfied. By default, the condition is to wait for
- * (int32_t)(*addr - value) >= 0, a cyclic greater-or-equal.
- * Other condition types can be specified via \p flags.
- *
- * If the memory was registered via ::cuMemHostRegister(), the device pointer
- * should be obtained with ::cuMemHostGetDevicePointer(). This function cannot
- * be used with managed memory (::cuMemAllocManaged).
- *
- * Support for CU_STREAM_WAIT_VALUE_NOR can be queried with ::cuDeviceGetAttribute() and
- * ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2.
- *
- * \note
- * Warning:
- * Improper use of this API may deadlock the application. Synchronization
- * ordering established through this API is not visible to CUDA. CUDA tasks
- * that are (even indirectly) ordered by this API should also have that order
- * expressed with CUDA-visible dependencies such as events. This ensures that
- * the scheduler does not serialize them in an improper order.
- *
- * \param stream The stream to synchronize on the memory location.
- * \param addr The memory location to wait on.
- * \param value The value to compare with the memory location.
- * \param flags See ::CUstreamWaitValue_flags.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuStreamWaitValue64,
- * ::cuStreamWriteValue32,
- * ::cuStreamWriteValue64,
- * ::cuStreamBatchMemOp,
- * ::cuMemHostRegister,
- * ::cuStreamWaitEvent
- */
- CUresult CUDAAPI cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
- /**
- * \brief Wait on a memory location
- *
- * Enqueues a synchronization of the stream on the given memory location. Work
- * ordered after the operation will block until the given condition on the
- * memory is satisfied. By default, the condition is to wait for
- * (int64_t)(*addr - value) >= 0, a cyclic greater-or-equal.
- * Other condition types can be specified via \p flags.
- *
- * If the memory was registered via ::cuMemHostRegister(), the device pointer
- * should be obtained with ::cuMemHostGetDevicePointer().
- *
- * Support for this can be queried with ::cuDeviceGetAttribute() and
- * ::CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS.
- *
- * \note
- * Warning:
- * Improper use of this API may deadlock the application. Synchronization
- * ordering established through this API is not visible to CUDA. CUDA tasks
- * that are (even indirectly) ordered by this API should also have that order
- * expressed with CUDA-visible dependencies such as events. This ensures that
- * the scheduler does not serialize them in an improper order.
- *
- * \param stream The stream to synchronize on the memory location.
- * \param addr The memory location to wait on.
- * \param value The value to compare with the memory location.
- * \param flags See ::CUstreamWaitValue_flags.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuStreamWaitValue32,
- * ::cuStreamWriteValue32,
- * ::cuStreamWriteValue64,
- * ::cuStreamBatchMemOp,
- * ::cuMemHostRegister,
- * ::cuStreamWaitEvent
- */
- CUresult CUDAAPI cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
- /**
- * \brief Write a value to memory
- *
- * Write a value to memory.
- *
- * If the memory was registered via ::cuMemHostRegister(), the device pointer
- * should be obtained with ::cuMemHostGetDevicePointer(). This function cannot
- * be used with managed memory (::cuMemAllocManaged).
- *
- * \param stream The stream to do the write in.
- * \param addr The device address to write to.
- * \param value The value to write.
- * \param flags See ::CUstreamWriteValue_flags.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuStreamWriteValue64,
- * ::cuStreamWaitValue32,
- * ::cuStreamWaitValue64,
- * ::cuStreamBatchMemOp,
- * ::cuMemHostRegister,
- * ::cuEventRecord
- */
- CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
- /**
- * \brief Write a value to memory
- *
- * Write a value to memory.
- *
- * If the memory was registered via ::cuMemHostRegister(), the device pointer
- * should be obtained with ::cuMemHostGetDevicePointer().
- *
- * Support for this can be queried with ::cuDeviceGetAttribute() and
- * ::CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS.
- *
- * \param stream The stream to do the write in.
- * \param addr The device address to write to.
- * \param value The value to write.
- * \param flags See ::CUstreamWriteValue_flags.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuStreamWriteValue32,
- * ::cuStreamWaitValue32,
- * ::cuStreamWaitValue64,
- * ::cuStreamBatchMemOp,
- * ::cuMemHostRegister,
- * ::cuEventRecord
- */
- CUresult CUDAAPI cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
- /**
- * \brief Batch operations to synchronize the stream via memory operations
- *
- * This is a batch version of ::cuStreamWaitValue32() and ::cuStreamWriteValue32().
- * Batching operations may avoid some performance overhead in both the API call
- * and the device execution versus adding them to the stream in separate API
- * calls. The operations are enqueued in the order they appear in the array.
- *
- * See ::CUstreamBatchMemOpType for the full set of supported operations, and
- * ::cuStreamWaitValue32(), ::cuStreamWaitValue64(), ::cuStreamWriteValue32(),
- * and ::cuStreamWriteValue64() for details of specific operations.
- *
- * See related APIs for details on querying support for specific operations.
- *
- * \note
- * Warning:
- * Improper use of this API may deadlock the application. Synchronization
- * ordering established through this API is not visible to CUDA. CUDA tasks
- * that are (even indirectly) ordered by this API should also have that order
- * expressed with CUDA-visible dependencies such as events. This ensures that
- * the scheduler does not serialize them in an improper order. For more
- * information, see the Stream Memory Operations section in the programming
- * guide(https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html).
- *
- * \param stream The stream to enqueue the operations in.
- * \param count The number of operations in the array. Must be less than 256.
- * \param paramArray The types and parameters of the individual operations.
- * \param flags Reserved for future expansion; must be 0.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \notefnerr
- *
- * \sa ::cuStreamWaitValue32,
- * ::cuStreamWaitValue64,
- * ::cuStreamWriteValue32,
- * ::cuStreamWriteValue64,
- * ::cuMemHostRegister
- */
- CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams *paramArray, unsigned int flags);
- /** @} */ /* END CUDA_MEMOP */
- /**
- * \defgroup CUDA_EXEC Execution Control
- *
- * ___MANBRIEF___ execution control functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the execution control functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Returns information about a function
- *
- * Returns in \p *pi the integer value of the attribute \p attrib on the kernel
- * given by \p hfunc. The supported attributes are:
- * - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threads
- * per block, beyond which a launch of the function would fail. This number
- * depends on both the function and the device on which the function is
- * currently loaded.
- * - ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: The size in bytes of
- * statically-allocated shared memory per block required by this function.
- * This does not include dynamically-allocated shared memory requested by
- * the user at runtime.
- * - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: The size in bytes of user-allocated
- * constant memory required by this function.
- * - ::CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: The size in bytes of local memory
- * used by each thread of this function.
- * - ::CU_FUNC_ATTRIBUTE_NUM_REGS: The number of registers used by each thread
- * of this function.
- * - ::CU_FUNC_ATTRIBUTE_PTX_VERSION: The PTX virtual architecture version for
- * which the function was compiled. This value is the major PTX version * 10
- * + the minor PTX version, so a PTX version 1.3 function would return the
- * value 13. Note that this may return the undefined value of 0 for cubins
- * compiled prior to CUDA 3.0.
- * - ::CU_FUNC_ATTRIBUTE_BINARY_VERSION: The binary architecture version for
- * which the function was compiled. This value is the major binary
- * version * 10 + the minor binary version, so a binary version 1.3 function
- * would return the value 13. Note that this will return a value of 10 for
- * legacy cubins that do not have a properly-encoded binary architecture
- * version.
- * - ::CU_FUNC_CACHE_MODE_CA: The attribute to indicate whether the function has
- * been compiled with user specified option "-Xptxas --dlcm=ca" set .
- * - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: The maximum size in bytes of
- * dynamically-allocated shared memory.
- * - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: Preferred shared memory-L1
- * cache split ratio in percent of total shared memory.
- * - ::CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET: If this attribute is set, the
- * kernel must launch with a valid cluster size specified.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH: The required cluster width in
- * blocks.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT: The required cluster height in
- * blocks.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH: The required cluster depth in
- * blocks.
- * - ::CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED: Indicates whether
- * the function can be launched with non-portable cluster size. 1 is allowed,
- * 0 is disallowed. A non-portable cluster size may only function on the
- * specific SKUs the program is tested on. The launch might fail if the
- * program is run on a different hardware platform. CUDA API provides
- * cudaOccupancyMaxActiveClusters to assist with checking whether the desired
- * size can be launched on the current device. A portable cluster size is
- * guaranteed to be functional on all compute capabilities higher than the
- * target compute capability. The portable cluster size for sm_90 is 8 blocks
- * per cluster. This value may increase for future compute capabilities. The
- * specific hardware unit may support higher cluster sizes that’s not
- * guaranteed to be portable.
- * - ::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block
- * scheduling policy of a function. The value type is CUclusterSchedulingPolicy.
- *
- * With a few execeptions, function attributes may also be queried on unloaded
- * function handles returned from ::cuModuleEnumerateFunctions.
- * ::CUDA_ERROR_FUNCTION_NOT_LOADED is returned if the attribute requires a fully
- * loaded function but the function is not loaded. The loading state of a function
- * may be queried using ::cuFuncIsloaded. ::cuFuncLoad may be called to explicitly
- * load a function before querying the following attributes that require the function
- * to be loaded:
- * - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
- * - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
- * - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
- *
- * \param pi - Returned attribute value
- * \param attrib - Attribute requested
- * \param hfunc - Function to query attribute of
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_FUNCTION_NOT_LOADED
- * \notefnerr
- *
- * \sa ::cuCtxGetCacheConfig,
- * ::cuCtxSetCacheConfig,
- * ::cuFuncSetCacheConfig,
- * ::cuLaunchKernel,
- * ::cudaFuncGetAttributes,
- * ::cudaFuncSetAttribute,
- * ::cuFuncIsLoaded,
- * ::cuFuncLoad,
- * ::cuKernelGetAttribute
- */
- CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc);
- /**
- * \brief Sets information about a function
- *
- * This call sets the value of a specified attribute \p attrib on the kernel given
- * by \p hfunc to an integer value specified by \p val
- * This function returns CUDA_SUCCESS if the new value of the attribute could be
- * successfully set. If the set fails, this call will return an error.
- * Not all attributes can have values set. Attempting to set a value on a read-only
- * attribute will result in an error (CUDA_ERROR_INVALID_VALUE)
- *
- * Supported attributes for the cuFuncSetAttribute call are:
- * - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: This maximum size in bytes of
- * dynamically-allocated shared memory. The value should contain the requested
- * maximum size of dynamically-allocated shared memory. The sum of this value and
- * the function attribute ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES cannot exceed the
- * device attribute ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN.
- * The maximal size of requestable dynamic shared memory may differ by GPU
- * architecture.
- * - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: On devices where the L1
- * cache and shared memory use the same hardware resources, this sets the shared memory
- * carveout preference, in percent of the total shared memory.
- * See ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR
- * This is only a hint, and the driver can choose a different ratio if required to execute the function.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH: The required cluster width in
- * blocks. The width, height, and depth values must either all be 0 or all be
- * positive. The validity of the cluster dimensions is checked at launch time.
- * If the value is set during compile time, it cannot be set at runtime.
- * Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT: The required cluster height in
- * blocks. The width, height, and depth values must either all be 0 or all be
- * positive. The validity of the cluster dimensions is checked at launch time.
- * If the value is set during compile time, it cannot be set at runtime.
- * Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
- * - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH: The required cluster depth in
- * blocks. The width, height, and depth values must either all be 0 or all be
- * positive. The validity of the cluster dimensions is checked at launch time.
- * If the value is set during compile time, it cannot be set at runtime.
- * Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
- * - ::CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED: Indicates whether
- * the function can be launched with non-portable cluster size. 1 is allowed,
- * 0 is disallowed.
- * - ::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block
- * scheduling policy of a function. The value type is CUclusterSchedulingPolicy.
- *
- * \param hfunc - Function to query attribute of
- * \param attrib - Attribute requested
- * \param value - The value to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuCtxGetCacheConfig,
- * ::cuCtxSetCacheConfig,
- * ::cuFuncSetCacheConfig,
- * ::cuLaunchKernel,
- * ::cudaFuncGetAttributes,
- * ::cudaFuncSetAttribute,
- * ::cuKernelSetAttribute
- */
- CUresult CUDAAPI cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
- /**
- * \brief Sets the preferred cache configuration for a device function
- *
- * On devices where the L1 cache and shared memory use the same hardware
- * resources, this sets through \p config the preferred cache configuration for
- * the device function \p hfunc. This is only a preference. The driver will use
- * the requested configuration if possible, but it is free to choose a different
- * configuration if required to execute \p hfunc. Any context-wide preference
- * set via ::cuCtxSetCacheConfig() will be overridden by this per-function
- * setting unless the per-function setting is ::CU_FUNC_CACHE_PREFER_NONE. In
- * that case, the current context-wide setting will be used.
- *
- * This setting does nothing on devices where the size of the L1 cache and
- * shared memory are fixed.
- *
- * Launching a kernel with a different preference than the most recent
- * preference setting may insert a device-side synchronization point.
- *
- *
- * The supported cache configurations are:
- * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
- * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
- * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
- * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
- *
- * \param hfunc - Kernel to configure cache for
- * \param config - Requested cache configuration
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT
- * \notefnerr
- *
- * \sa ::cuCtxGetCacheConfig,
- * ::cuCtxSetCacheConfig,
- * ::cuFuncGetAttribute,
- * ::cuLaunchKernel,
- * ::cudaFuncSetCacheConfig,
- * ::cuKernelSetCacheConfig
- */
- CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
- /**
- * \brief Returns a module handle
- *
- * Returns in \p *hmod the handle of the module that function \p hfunc
- * is located in. The lifetime of the module corresponds to the lifetime of
- * the context it was loaded in or until the module is explicitly unloaded.
- *
- * The CUDA runtime manages its own modules loaded into the primary context.
- * If the handle returned by this API refers to a module loaded by the CUDA runtime,
- * calling ::cuModuleUnload() on that module will result in undefined behavior.
- *
- * \param hmod - Returned module handle
- * \param hfunc - Function to retrieve module for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_FOUND
- * \notefnerr
- *
- */
- CUresult CUDAAPI cuFuncGetModule(CUmodule *hmod, CUfunction hfunc);
- /**
- * \brief Returns the function name for a ::CUfunction handle
- *
- * Returns in \p **name the function name associated with the function handle \p hfunc .
- * The function name is returned as a null-terminated string. The returned name is only
- * valid when the function handle is valid. If the module is unloaded or reloaded, one
- * must call the API again to get the updated name. This API may return a mangled name if
- * the function is not declared as having C linkage. If either \p **name or \p hfunc
- * is NULL, ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * \param name - The returned name of the function
- * \param hfunc - The function handle to retrieve the name for
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- */
- CUresult CUDAAPI cuFuncGetName(const char **name, CUfunction hfunc);
- /**
- * \brief Returns the offset and size of a kernel parameter in the device-side parameter layout
- *
- * Queries the kernel parameter at \p paramIndex into \p func's list of parameters, and returns
- * in \p paramOffset and \p paramSize the offset and size, respectively, where the parameter
- * will reside in the device-side parameter layout. This information can be used to update kernel
- * node parameters from the device via ::cudaGraphKernelNodeSetParam() and
- * ::cudaGraphKernelNodeUpdatesApply(). \p paramIndex must be less than the number of parameters
- * that \p func takes. \p paramSize can be set to NULL if only the parameter offset is desired.
- *
- * \param func - The function to query
- * \param paramIndex - The parameter index to query
- * \param paramOffset - Returns the offset into the device-side parameter layout at which the parameter resides
- * \param paramSize - Optionally returns the size of the parameter in the device-side parameter layout
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \notefnerr
- *
- * \sa ::cuKernelGetParamInfo
- */
- CUresult CUDAAPI cuFuncGetParamInfo(CUfunction func, size_t paramIndex, size_t *paramOffset, size_t *paramSize);
- typedef enum CUfunctionLoadingState_enum {
- CU_FUNCTION_LOADING_STATE_UNLOADED = 0,
- CU_FUNCTION_LOADING_STATE_LOADED = 1,
- CU_FUNCTION_LOADING_STATE_MAX
- } CUfunctionLoadingState;
- /**
- * \brief Returns if the function is loaded
- *
- * Returns in \p state the loading state of \p function.
- *
- * \param state - returned loading state
- * \param function - the function to check
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuFuncLoad,
- * ::cuModuleEnumerateFunctions
- */
- CUresult CUDAAPI cuFuncIsLoaded(CUfunctionLoadingState *state, CUfunction function);
- /**
- * \brief Loads a function
- *
- * Finalizes function loading for \p function. Calling this API with a
- * fully loaded function has no effect.
- *
- * \param function - the function to load
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuModuleEnumerateFunctions,
- * ::cuFuncIsLoaded
- */
- CUresult CUDAAPI cuFuncLoad(CUfunction function);
- /**
- * \brief Launches a CUDA function ::CUfunction or a CUDA kernel ::CUkernel
- *
- * Invokes the function ::CUfunction or the kernel ::CUkernel \p f
- * on a \p gridDimX x \p gridDimY x \p gridDimZ grid of blocks.
- * Each block contains \p blockDimX x \p blockDimY x
- * \p blockDimZ threads.
- *
- * \p sharedMemBytes sets the amount of dynamic shared memory that will be
- * available to each thread block.
- *
- * Kernel parameters to \p f can be specified in one of two ways:
- *
- * 1) Kernel parameters can be specified via \p kernelParams. If \p f
- * has N parameters, then \p kernelParams needs to be an array of N
- * pointers. Each of \p kernelParams[0] through \p kernelParams[N-1]
- * must point to a region of memory from which the actual kernel
- * parameter will be copied. The number of kernel parameters and their
- * offsets and sizes do not need to be specified as that information is
- * retrieved directly from the kernel's image.
- *
- * 2) Kernel parameters can also be packaged by the application into
- * a single buffer that is passed in via the \p extra parameter.
- * This places the burden on the application of knowing each kernel
- * parameter's size and alignment/padding within the buffer. Here is
- * an example of using the \p extra parameter in this manner:
- * \code
- size_t argBufferSize;
- char argBuffer[256];
- // populate argBuffer and argBufferSize
- void *config[] = {
- CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
- CU_LAUNCH_PARAM_BUFFER_SIZE, &argBufferSize,
- CU_LAUNCH_PARAM_END
- };
- status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL, config);
- * \endcode
- *
- * The \p extra parameter exists to allow ::cuLaunchKernel to take
- * additional less commonly used arguments. \p extra specifies a list of
- * names of extra settings and their corresponding values. Each extra
- * setting name is immediately followed by the corresponding value. The
- * list must be terminated with either NULL or ::CU_LAUNCH_PARAM_END.
- *
- * - ::CU_LAUNCH_PARAM_END, which indicates the end of the \p extra
- * array;
- * - ::CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that the next
- * value in \p extra will be a pointer to a buffer containing all
- * the kernel parameters for launching kernel \p f;
- * - ::CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies that the next
- * value in \p extra will be a pointer to a size_t containing the
- * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER;
- *
- * The error ::CUDA_ERROR_INVALID_VALUE will be returned if kernel
- * parameters are specified with both \p kernelParams and \p extra
- * (i.e. both \p kernelParams and \p extra are non-NULL).
- *
- * Calling ::cuLaunchKernel() invalidates the persistent function state
- * set through the following deprecated APIs:
- * ::cuFuncSetBlockShape(),
- * ::cuFuncSetSharedSize(),
- * ::cuParamSetSize(),
- * ::cuParamSeti(),
- * ::cuParamSetf(),
- * ::cuParamSetv().
- *
- * Note that to use ::cuLaunchKernel(), the kernel \p f must either have
- * been compiled with toolchain version 3.2 or later so that it will
- * contain kernel parameter information, or have no kernel parameters.
- * If either of these conditions is not met, then ::cuLaunchKernel() will
- * return ::CUDA_ERROR_INVALID_IMAGE.
- *
- * Note that the API can also be used to launch context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to launch
- * the kernel on will either be taken from the specified stream \p hStream
- * or the current context in case of NULL stream.
- *
- * \param f - Function ::CUfunction or Kernel ::CUkernel to launch
- * \param gridDimX - Width of grid in blocks
- * \param gridDimY - Height of grid in blocks
- * \param gridDimZ - Depth of grid in blocks
- * \param blockDimX - X dimension of each thread block
- * \param blockDimY - Y dimension of each thread block
- * \param blockDimZ - Z dimension of each thread block
- * \param sharedMemBytes - Dynamic shared-memory size per thread block in bytes
- * \param hStream - Stream identifier
- * \param kernelParams - Array of pointers to kernel parameters
- * \param extra - Extra options
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_IMAGE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_LAUNCH_FAILED,
- * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
- * ::CUDA_ERROR_LAUNCH_TIMEOUT,
- * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
- * ::CUDA_ERROR_NOT_FOUND
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuCtxGetCacheConfig,
- * ::cuCtxSetCacheConfig,
- * ::cuFuncSetCacheConfig,
- * ::cuFuncGetAttribute,
- * ::cudaLaunchKernel,
- * ::cuLibraryGetKernel,
- * ::cuKernelSetCacheConfig,
- * ::cuKernelGetAttribute,
- * ::cuKernelSetAttribute
- */
- CUresult CUDAAPI cuLaunchKernel(CUfunction f,
- unsigned int gridDimX,
- unsigned int gridDimY,
- unsigned int gridDimZ,
- unsigned int blockDimX,
- unsigned int blockDimY,
- unsigned int blockDimZ,
- unsigned int sharedMemBytes,
- CUstream hStream,
- void **kernelParams,
- void **extra);
- /**
- * \brief Launches a CUDA function ::CUfunction or a CUDA kernel ::CUkernel with launch-time configuration
- *
- * Invokes the function ::CUfunction or the kernel ::CUkernel \p f with the specified launch-time configuration
- * \p config.
- *
- * The ::CUlaunchConfig structure is defined as:
- *
- * \code
- * typedef struct CUlaunchConfig_st {
- * unsigned int gridDimX;
- * unsigned int gridDimY;
- * unsigned int gridDimZ;
- * unsigned int blockDimX;
- * unsigned int blockDimY;
- * unsigned int blockDimZ;
- * unsigned int sharedMemBytes;
- * CUstream hStream;
- * CUlaunchAttribute *attrs;
- * unsigned int numAttrs;
- * } CUlaunchConfig;
- * \endcode
- *
- * where:
- * - ::CUlaunchConfig::gridDimX is the width of the grid in blocks.
- * - ::CUlaunchConfig::gridDimY is the height of the grid in blocks.
- * - ::CUlaunchConfig::gridDimZ is the depth of the grid in blocks.
- * - ::CUlaunchConfig::blockDimX is the X dimension of each thread block.
- * - ::CUlaunchConfig::blockDimX is the Y dimension of each thread block.
- * - ::CUlaunchConfig::blockDimZ is the Z dimension of each thread block.
- * - ::CUlaunchConfig::sharedMemBytes is the dynamic shared-memory size per
- * thread block in bytes.
- * - ::CUlaunchConfig::hStream is the handle to the stream to perform the launch
- * in. The CUDA context associated with this stream must match that associated
- * with function f.
- * - ::CUlaunchConfig::attrs is an array of ::CUlaunchConfig::numAttrs
- * continguous ::CUlaunchAttribute elements. The value of this pointer is not
- * considered if ::CUlaunchConfig::numAttrs is zero. However, in that case, it
- * is recommended to set the pointer to NULL.
- * - ::CUlaunchConfig::numAttrs is the number of attributes populating the
- * first ::CUlaunchConfig::numAttrs positions of the ::CUlaunchConfig::attrs
- * array.
- *
- * Launch-time configuration is specified by adding entries to
- * ::CUlaunchConfig::attrs. Each entry is an attribute ID and a corresponding
- * attribute value.
- *
- * The ::CUlaunchAttribute structure is defined as:
- * \code
- * typedef struct CUlaunchAttribute_st {
- * CUlaunchAttributeID id;
- * CUlaunchAttributeValue value;
- * } CUlaunchAttribute;
- * \endcode
- * where:
- * - ::CUlaunchAttribute::id is a unique enum identifying the attribute.
- * - ::CUlaunchAttribute::value is a union that hold the attribute value.
- *
- * An example of using the \p config parameter:
- * \code
- * CUlaunchAttribute coopAttr = {.id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE,
- * .value = 1};
- * CUlaunchConfig config = {... // set block and grid dimensions
- * .attrs = &coopAttr,
- * .numAttrs = 1};
- *
- * cuLaunchKernelEx(&config, kernel, NULL, NULL);
- * \endcode
- *
- * The ::CUlaunchAttributeID enum is defined as:
- * \code
- * typedef enum CUlaunchAttributeID_enum {
- * CU_LAUNCH_ATTRIBUTE_IGNORE = 0,
- * CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1,
- * CU_LAUNCH_ATTRIBUTE_COOPERATIVE = 2,
- * CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3,
- * CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = 4,
- * CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 5,
- * CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = 6,
- * CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = 7,
- * CU_LAUNCH_ATTRIBUTE_PRIORITY = 8,
- * CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = 9,
- * CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = 10,
- * CU_LAUNCH_ATTRIBUTE_PREFERRED_CLUSTER_DIMENSION = 11,
- * CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = 12,
- * CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13,
- * } CUlaunchAttributeID;
- * \endcode
- *
- * and the corresponding ::CUlaunchAttributeValue union as :
- * \code
- * typedef union CUlaunchAttributeValue_union {
- * CUaccessPolicyWindow accessPolicyWindow;
- * int cooperative;
- * CUsynchronizationPolicy syncPolicy;
- * struct {
- * unsigned int x;
- * unsigned int y;
- * unsigned int z;
- * } clusterDim;
- * CUclusterSchedulingPolicy clusterSchedulingPolicyPreference;
- * int programmaticStreamSerializationAllowed;
- * struct {
- * CUevent event;
- * int flags;
- * int triggerAtBlockStart;
- * } programmaticEvent;
- * int priority;
- * CUlaunchMemSyncDomainMap memSyncDomainMap;
- * CUlaunchMemSyncDomain memSyncDomain;
- * struct {
- * unsigned int x;
- * unsigned int y;
- * unsigned int z;
- * } preferredClusterDim;
- * struct {
- * CUevent event;
- * int flags;
- * } launchCompletionEvent;
- * struct {
- * int deviceUpdatable;
- * CUgraphDeviceNode devNode;
- * } deviceUpdatableKernelNode;
- * } CUlaunchAttributeValue;
- * \endcode
- *
- * Setting ::CU_LAUNCH_ATTRIBUTE_COOPERATIVE to a non-zero value causes the
- * kernel launch to be a cooperative launch, with exactly the same usage and
- * semantics of ::cuLaunchCooperativeKernel.
- *
- * Setting ::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION to a non-zero
- * values causes the kernel to use programmatic means to resolve its stream
- * dependency -- enabling the CUDA runtime to opportunistically allow the grid's
- * execution to overlap with the previous kernel in the stream, if that kernel
- * requests the overlap.
- *
- * ::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT records an event along with the
- * kernel launch. Event recorded through this launch attribute is guaranteed to
- * only trigger after all block in the associated kernel trigger the event. A
- * block can trigger the event through PTX launchdep.release or CUDA builtin
- * function cudaTriggerProgrammaticLaunchCompletion(). A trigger can also be
- * inserted at the beginning of each block's execution if triggerAtBlockStart is
- * set to non-0. Note that dependents (including the CPU thread calling
- * cuEventSynchronize()) are not guaranteed to observe the release precisely
- * when it is released. For example, cuEventSynchronize() may only observe the
- * event trigger long after the associated kernel has completed. This recording
- * type is primarily meant for establishing programmatic dependency between
- * device tasks. The event supplied must not be an interprocess or interop
- * event. The event must disable timing (i.e. created with
- * ::CU_EVENT_DISABLE_TIMING flag set).
- *
- * ::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT records an event along with
- * the kernel launch. Nominally, the event is triggered once all blocks of the
- * kernel have begun execution. Currently this is a best effort. If a kernel B
- * has a launch completion dependency on a kernel A, B may wait until A is
- * complete. Alternatively, blocks of B may begin before all blocks of A have
- * begun, for example:
- *
- * - If B can claim execution resources unavaiable to A, for example if they
- * run on different GPUs.
- * - If B is a higher priority than A.
- *
- * Exercise caution if such an ordering inversion could lead to deadlock. The
- * event supplied must not be an interprocess or interop event. The event must
- * disable timing (i.e. must be created with the ::CU_EVENT_DISABLE_TIMING flag
- * set).
- *
- * Setting ::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE to 1
- * on a captured launch causes the resulting kernel node to be device-updatable.
- * This attribute is specific to graphs, and passing it to a launch in a
- * non-capturing stream results in an error. Passing a value other than 0 or 1 is
- * not allowed.
- *
- * On success, a handle will be returned via
- * ::CUlaunchAttributeValue::deviceUpdatableKernelNode::devNode which can be passed
- * to the various device-side update functions to update the node's kernel parameters
- * from within another kernel. For more information on the types of device updates
- * that can be made, as well as the relevant limitations thereof, see
- * ::cudaGraphKernelNodeUpdatesApply.
- *
- * Kernel nodes which are device-updatable have additional restrictions compared to regular
- * kernel nodes. Firstly, device-updatable nodes cannot be removed from their graph via
- * ::cuGraphDestroyNode. Additionally, once opted-in to this functionality, a node cannot
- * opt out, and any attempt to set the attribute to 0 will result in an error. Graphs
- * containing one or more device-updatable node also do not allow multiple instantiation.
- *
- * ::CU_LAUNCH_ATTRIBUTE_PREFERRED_CLUSTER_DIMENSION allows the kernel launch to
- * specify a preferred substitute cluster dimension. Blocks may be grouped
- * according to either the dimensions specified with this attribute (grouped
- * into a "preferred substitute cluster"), or the one specified with
- * ::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION attribute (grouped into a "regular
- * cluster"). The cluster dimensions of a "preferred substitute cluster" shall
- * be an integer multiple greater than zero of the regular cluster dimensions.
- * The device will attempt - on a best-effort basis - to group thread blocks
- * into preferred clusters over grouping them into regular clusters. When it
- * deems necessary (primarily when the device temporarily runs out of physical
- * resources to launch the larger preferred clusters), the device may switch to
- * launch the regular clusters instead to attempt to utilize as much of the
- * physical device resources as possible.
- *
- * Each type of cluster will have its enumeration / coordinate setup as if the
- * grid consists solely of its type of cluster. For example, if the preferred
- * substitute cluster dimensions double the regular cluster dimensions, there
- * might be simultaneously a regular cluster indexed at (1,0,0), and a preferred
- * cluster indexed at (1,0,0). In this example, the preferred substitute cluster
- * (1,0,0) replaces regular clusters (2,0,0) and (3,0,0) and groups their
- * blocks.
- *
- * This attribute will only take effect when a regular cluster dimension has
- * been specified. The preferred substitute The preferred substitute cluster
- * dimension must be an integer multiple greater than zero of the regular
- * cluster dimension and must divide the grid. It must also be no more than
- * `maxBlocksPerCluster`, if it is set in the kernel's `__launch_bounds__`.
- * Otherwise it must be less than the maximum value the driver can support.
- * Otherwise, setting this attribute to a value physically unable to fit on any
- * particular device is permitted.
- *
- * The effect of other attributes is consistent with their effect when set via
- * persistent APIs.
- *
- * See ::cuStreamSetAttribute for
- * - ::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW
- * - ::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY
- *
- * See ::cuFuncSetAttribute for
- * - ::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
- * - ::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
- *
- * Kernel parameters to \p f can be specified in the same ways that they can be
- * using ::cuLaunchKernel.
- *
- * Note that the API can also be used to launch context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to launch
- * the kernel on will either be taken from the specified stream ::CUlaunchConfig::hStream
- * or the current context in case of NULL stream.
- *
- * \param config - Config to launch
- * \param f - Function ::CUfunction or Kernel ::CUkernel to launch
- * \param kernelParams - Array of pointers to kernel parameters
- * \param extra - Extra options
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_IMAGE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_LAUNCH_FAILED,
- * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
- * ::CUDA_ERROR_LAUNCH_TIMEOUT,
- * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
- * ::CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
- * ::CUDA_ERROR_NOT_FOUND
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuCtxGetCacheConfig,
- * ::cuCtxSetCacheConfig,
- * ::cuFuncSetCacheConfig,
- * ::cuFuncGetAttribute,
- * ::cudaLaunchKernel,
- * ::cudaLaunchKernelEx,
- * ::cuLibraryGetKernel,
- * ::cuKernelSetCacheConfig,
- * ::cuKernelGetAttribute,
- * ::cuKernelSetAttribute
- */
- CUresult CUDAAPI cuLaunchKernelEx(const CUlaunchConfig *config,
- CUfunction f,
- void **kernelParams,
- void **extra);
- /**
- * \brief Launches a CUDA function ::CUfunction or a CUDA kernel ::CUkernel where thread blocks
- * can cooperate and synchronize as they execute
- *
- * Invokes the function ::CUfunction or the kernel ::CUkernel \p f on a \p gridDimX x \p gridDimY x \p gridDimZ
- * grid of blocks. Each block contains \p blockDimX x \p blockDimY x
- * \p blockDimZ threads.
- *
- * \p sharedMemBytes sets the amount of dynamic shared memory that will be
- * available to each thread block.
- *
- * The device on which this kernel is invoked must have a non-zero value for
- * the device attribute ::CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH.
- *
- * The total number of blocks launched cannot exceed the maximum number of blocks per
- * multiprocessor as returned by ::cuOccupancyMaxActiveBlocksPerMultiprocessor (or
- * ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) times the number of multiprocessors
- * as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT.
- *
- * The kernel cannot make use of CUDA dynamic parallelism.
- *
- * Kernel parameters must be specified via \p kernelParams. If \p f
- * has N parameters, then \p kernelParams needs to be an array of N
- * pointers. Each of \p kernelParams[0] through \p kernelParams[N-1]
- * must point to a region of memory from which the actual kernel
- * parameter will be copied. The number of kernel parameters and their
- * offsets and sizes do not need to be specified as that information is
- * retrieved directly from the kernel's image.
- *
- * Calling ::cuLaunchCooperativeKernel() sets persistent function state that is
- * the same as function state set through ::cuLaunchKernel API
- *
- * When the kernel \p f is launched via ::cuLaunchCooperativeKernel(), the previous
- * block shape, shared size and parameter info associated with \p f
- * is overwritten.
- *
- * Note that to use ::cuLaunchCooperativeKernel(), the kernel \p f must either have
- * been compiled with toolchain version 3.2 or later so that it will
- * contain kernel parameter information, or have no kernel parameters.
- * If either of these conditions is not met, then ::cuLaunchCooperativeKernel() will
- * return ::CUDA_ERROR_INVALID_IMAGE.
- *
- * Note that the API can also be used to launch context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to launch
- * the kernel on will either be taken from the specified stream \p hStream
- * or the current context in case of NULL stream.
- *
- * \param f - Function ::CUfunction or Kernel ::CUkernel to launch
- * \param gridDimX - Width of grid in blocks
- * \param gridDimY - Height of grid in blocks
- * \param gridDimZ - Depth of grid in blocks
- * \param blockDimX - X dimension of each thread block
- * \param blockDimY - Y dimension of each thread block
- * \param blockDimZ - Z dimension of each thread block
- * \param sharedMemBytes - Dynamic shared-memory size per thread block in bytes
- * \param hStream - Stream identifier
- * \param kernelParams - Array of pointers to kernel parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_IMAGE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_LAUNCH_FAILED,
- * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
- * ::CUDA_ERROR_LAUNCH_TIMEOUT,
- * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
- * ::CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
- * ::CUDA_ERROR_NOT_FOUND
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuCtxGetCacheConfig,
- * ::cuCtxSetCacheConfig,
- * ::cuFuncSetCacheConfig,
- * ::cuFuncGetAttribute,
- * ::cuLaunchCooperativeKernelMultiDevice,
- * ::cudaLaunchCooperativeKernel,
- * ::cuLibraryGetKernel,
- * ::cuKernelSetCacheConfig,
- * ::cuKernelGetAttribute,
- * ::cuKernelSetAttribute
- */
- CUresult CUDAAPI cuLaunchCooperativeKernel(CUfunction f,
- unsigned int gridDimX,
- unsigned int gridDimY,
- unsigned int gridDimZ,
- unsigned int blockDimX,
- unsigned int blockDimY,
- unsigned int blockDimZ,
- unsigned int sharedMemBytes,
- CUstream hStream,
- void **kernelParams);
- /**
- * \brief Launches CUDA functions on multiple devices where thread blocks can cooperate and synchronize as they execute
- *
- * \deprecated This function is deprecated as of CUDA 11.3.
- *
- * Invokes kernels as specified in the \p launchParamsList array where each element
- * of the array specifies all the parameters required to perform a single kernel launch.
- * These kernels can cooperate and synchronize as they execute. The size of the array is
- * specified by \p numDevices.
- *
- * No two kernels can be launched on the same device. All the devices targeted by this
- * multi-device launch must be identical. All devices must have a non-zero value for the
- * device attribute ::CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH.
- *
- * All kernels launched must be identical with respect to the compiled code. Note that
- * any __device__, __constant__ or __managed__ variables present in the module that owns
- * the kernel launched on each device, are independently instantiated on every device.
- * It is the application's responsibility to ensure these variables are initialized and
- * used appropriately.
- *
- * The size of the grids as specified in blocks, the size of the blocks themselves
- * and the amount of shared memory used by each thread block must also match across
- * all launched kernels.
- *
- * The streams used to launch these kernels must have been created via either ::cuStreamCreate
- * or ::cuStreamCreateWithPriority. The NULL stream or ::CU_STREAM_LEGACY or ::CU_STREAM_PER_THREAD
- * cannot be used.
- *
- * The total number of blocks launched per kernel cannot exceed the maximum number of blocks
- * per multiprocessor as returned by ::cuOccupancyMaxActiveBlocksPerMultiprocessor (or
- * ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) times the number of multiprocessors
- * as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. Since the
- * total number of blocks launched per device has to match across all devices, the maximum
- * number of blocks that can be launched per device will be limited by the device with the
- * least number of multiprocessors.
- *
- * The kernels cannot make use of CUDA dynamic parallelism.
- *
- * The ::CUDA_LAUNCH_PARAMS structure is defined as:
- * \code
- typedef struct CUDA_LAUNCH_PARAMS_st
- {
- CUfunction function;
- unsigned int gridDimX;
- unsigned int gridDimY;
- unsigned int gridDimZ;
- unsigned int blockDimX;
- unsigned int blockDimY;
- unsigned int blockDimZ;
- unsigned int sharedMemBytes;
- CUstream hStream;
- void **kernelParams;
- } CUDA_LAUNCH_PARAMS;
- * \endcode
- * where:
- * - ::CUDA_LAUNCH_PARAMS::function specifies the kernel to be launched. All functions must
- * be identical with respect to the compiled code.
- * Note that you can also specify context-less kernel ::CUkernel by querying the handle
- * using ::cuLibraryGetKernel() and then casting to ::CUfunction. In this case, the context to
- * launch the kernel on be taken from the specified stream ::CUDA_LAUNCH_PARAMS::hStream.
- * - ::CUDA_LAUNCH_PARAMS::gridDimX is the width of the grid in blocks. This must match across
- * all kernels launched.
- * - ::CUDA_LAUNCH_PARAMS::gridDimY is the height of the grid in blocks. This must match across
- * all kernels launched.
- * - ::CUDA_LAUNCH_PARAMS::gridDimZ is the depth of the grid in blocks. This must match across
- * all kernels launched.
- * - ::CUDA_LAUNCH_PARAMS::blockDimX is the X dimension of each thread block. This must match across
- * all kernels launched.
- * - ::CUDA_LAUNCH_PARAMS::blockDimX is the Y dimension of each thread block. This must match across
- * all kernels launched.
- * - ::CUDA_LAUNCH_PARAMS::blockDimZ is the Z dimension of each thread block. This must match across
- * all kernels launched.
- * - ::CUDA_LAUNCH_PARAMS::sharedMemBytes is the dynamic shared-memory size per thread block in bytes.
- * This must match across all kernels launched.
- * - ::CUDA_LAUNCH_PARAMS::hStream is the handle to the stream to perform the launch in. This cannot
- * be the NULL stream or ::CU_STREAM_LEGACY or ::CU_STREAM_PER_THREAD. The CUDA context associated
- * with this stream must match that associated with ::CUDA_LAUNCH_PARAMS::function.
- * - ::CUDA_LAUNCH_PARAMS::kernelParams is an array of pointers to kernel parameters. If
- * ::CUDA_LAUNCH_PARAMS::function has N parameters, then ::CUDA_LAUNCH_PARAMS::kernelParams
- * needs to be an array of N pointers. Each of ::CUDA_LAUNCH_PARAMS::kernelParams[0] through
- * ::CUDA_LAUNCH_PARAMS::kernelParams[N-1] must point to a region of memory from which the actual
- * kernel parameter will be copied. The number of kernel parameters and their offsets and sizes
- * do not need to be specified as that information is retrieved directly from the kernel's image.
- *
- * By default, the kernel won't begin execution on any GPU until all prior work in all the specified
- * streams has completed. This behavior can be overridden by specifying the flag
- * ::CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC. When this flag is specified, each kernel
- * will only wait for prior work in the stream corresponding to that GPU to complete before it begins
- * execution.
- *
- * Similarly, by default, any subsequent work pushed in any of the specified streams will not begin
- * execution until the kernels on all GPUs have completed. This behavior can be overridden by specifying
- * the flag ::CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC. When this flag is specified,
- * any subsequent work pushed in any of the specified streams will only wait for the kernel launched
- * on the GPU corresponding to that stream to complete before it begins execution.
- *
- * Calling ::cuLaunchCooperativeKernelMultiDevice() sets persistent function state that is
- * the same as function state set through ::cuLaunchKernel API when called individually for each
- * element in \p launchParamsList.
- *
- * When kernels are launched via ::cuLaunchCooperativeKernelMultiDevice(), the previous
- * block shape, shared size and parameter info associated with each ::CUDA_LAUNCH_PARAMS::function
- * in \p launchParamsList is overwritten.
- *
- * Note that to use ::cuLaunchCooperativeKernelMultiDevice(), the kernels must either have
- * been compiled with toolchain version 3.2 or later so that it will
- * contain kernel parameter information, or have no kernel parameters.
- * If either of these conditions is not met, then ::cuLaunchCooperativeKernelMultiDevice() will
- * return ::CUDA_ERROR_INVALID_IMAGE.
- *
- * \param launchParamsList - List of launch parameters, one per device
- * \param numDevices - Size of the \p launchParamsList array
- * \param flags - Flags to control launch behavior
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_IMAGE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_LAUNCH_FAILED,
- * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
- * ::CUDA_ERROR_LAUNCH_TIMEOUT,
- * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
- * ::CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuCtxGetCacheConfig,
- * ::cuCtxSetCacheConfig,
- * ::cuFuncSetCacheConfig,
- * ::cuFuncGetAttribute,
- * ::cuLaunchCooperativeKernel,
- * ::cudaLaunchCooperativeKernelMultiDevice
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS *launchParamsList, unsigned int numDevices, unsigned int flags);
- /**
- * \brief Enqueues a host function call in a stream
- *
- * Enqueues a host function to run in a stream. The function will be called
- * after currently enqueued work and will block work added after it.
- *
- * The host function must not make any CUDA API calls. Attempting to use a
- * CUDA API may result in ::CUDA_ERROR_NOT_PERMITTED, but this is not required.
- * The host function must not perform any synchronization that may depend on
- * outstanding CUDA work not mandated to run earlier. Host functions without a
- * mandated order (such as in independent streams) execute in undefined order
- * and may be serialized.
- *
- * For the purposes of Unified Memory, execution makes a number of guarantees:
- * <ul>
- * <li>The stream is considered idle for the duration of the function's
- * execution. Thus, for example, the function may always use memory attached
- * to the stream it was enqueued in.</li>
- * <li>The start of execution of the function has the same effect as
- * synchronizing an event recorded in the same stream immediately prior to
- * the function. It thus synchronizes streams which have been "joined"
- * prior to the function.</li>
- * <li>Adding device work to any stream does not have the effect of making
- * the stream active until all preceding host functions and stream callbacks
- * have executed. Thus, for
- * example, a function might use global attached memory even if work has
- * been added to another stream, if the work has been ordered behind the
- * function call with an event.</li>
- * <li>Completion of the function does not cause a stream to become
- * active except as described above. The stream will remain idle
- * if no device work follows the function, and will remain idle across
- * consecutive host functions or stream callbacks without device work in
- * between. Thus, for example,
- * stream synchronization can be done by signaling from a host function at the
- * end of the stream.</li>
- * </ul>
- *
- * Note that, in contrast to ::cuStreamAddCallback, the function will not be
- * called in the event of an error in the CUDA context.
- *
- * \param hStream - Stream to enqueue function call in
- * \param fn - The function to call once preceding stream operations are complete
- * \param userData - User-specified data to be passed to the function
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuStreamCreate,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamWaitEvent,
- * ::cuStreamDestroy,
- * ::cuMemAllocManaged,
- * ::cuStreamAttachMemAsync,
- * ::cuStreamAddCallback
- */
- CUresult CUDAAPI cuLaunchHostFunc(CUstream hStream, CUhostFn fn, void *userData);
- /** @} */ /* END CUDA_EXEC */
- /**
- * \defgroup CUDA_EXEC_DEPRECATED Execution Control [DEPRECATED]
- *
- * ___MANBRIEF___ deprecated execution control functions of the low-level CUDA
- * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the deprecated execution control functions of the
- * low-level CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Sets the block-dimensions for the function
- *
- * \deprecated
- *
- * Specifies the \p x, \p y, and \p z dimensions of the thread blocks that are
- * created when the kernel given by \p hfunc is launched.
- *
- * \param hfunc - Kernel to specify dimensions of
- * \param x - X dimension
- * \param y - Y dimension
- * \param z - Z dimension
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuFuncSetSharedSize,
- * ::cuFuncSetCacheConfig,
- * ::cuFuncGetAttribute,
- * ::cuParamSetSize,
- * ::cuParamSeti,
- * ::cuParamSetf,
- * ::cuParamSetv,
- * ::cuLaunch,
- * ::cuLaunchGrid,
- * ::cuLaunchGridAsync,
- * ::cuLaunchKernel
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
- /**
- * \brief Sets the dynamic shared-memory size for the function
- *
- * \deprecated
- *
- * Sets through \p bytes the amount of dynamic shared memory that will be
- * available to each thread block when the kernel given by \p hfunc is launched.
- *
- * \param hfunc - Kernel to specify dynamic shared-memory size for
- * \param bytes - Dynamic shared-memory size per thread in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuFuncSetBlockShape,
- * ::cuFuncSetCacheConfig,
- * ::cuFuncGetAttribute,
- * ::cuParamSetSize,
- * ::cuParamSeti,
- * ::cuParamSetf,
- * ::cuParamSetv,
- * ::cuLaunch,
- * ::cuLaunchGrid,
- * ::cuLaunchGridAsync,
- * ::cuLaunchKernel
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
- /**
- * \brief Sets the parameter size for the function
- *
- * \deprecated
- *
- * Sets through \p numbytes the total size in bytes needed by the function
- * parameters of the kernel corresponding to \p hfunc.
- *
- * \param hfunc - Kernel to set parameter size for
- * \param numbytes - Size of parameter list in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuFuncSetBlockShape,
- * ::cuFuncSetSharedSize,
- * ::cuFuncGetAttribute,
- * ::cuParamSetf,
- * ::cuParamSeti,
- * ::cuParamSetv,
- * ::cuLaunch,
- * ::cuLaunchGrid,
- * ::cuLaunchGridAsync,
- * ::cuLaunchKernel
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, unsigned int numbytes);
- /**
- * \brief Adds an integer parameter to the function's argument list
- *
- * \deprecated
- *
- * Sets an integer parameter that will be specified the next time the
- * kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset.
- *
- * \param hfunc - Kernel to add parameter to
- * \param offset - Offset to add parameter to argument list
- * \param value - Value of parameter
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuFuncSetBlockShape,
- * ::cuFuncSetSharedSize,
- * ::cuFuncGetAttribute,
- * ::cuParamSetSize,
- * ::cuParamSetf,
- * ::cuParamSetv,
- * ::cuLaunch,
- * ::cuLaunchGrid,
- * ::cuLaunchGridAsync,
- * ::cuLaunchKernel
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, unsigned int value);
- /**
- * \brief Adds a floating-point parameter to the function's argument list
- *
- * \deprecated
- *
- * Sets a floating-point parameter that will be specified the next time the
- * kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset.
- *
- * \param hfunc - Kernel to add parameter to
- * \param offset - Offset to add parameter to argument list
- * \param value - Value of parameter
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuFuncSetBlockShape,
- * ::cuFuncSetSharedSize,
- * ::cuFuncGetAttribute,
- * ::cuParamSetSize,
- * ::cuParamSeti,
- * ::cuParamSetv,
- * ::cuLaunch,
- * ::cuLaunchGrid,
- * ::cuLaunchGridAsync,
- * ::cuLaunchKernel
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, float value);
- /**
- * \brief Adds arbitrary data to the function's argument list
- *
- * \deprecated
- *
- * Copies an arbitrary amount of data (specified in \p numbytes) from \p ptr
- * into the parameter space of the kernel corresponding to \p hfunc. \p offset
- * is a byte offset.
- *
- * \param hfunc - Kernel to add data to
- * \param offset - Offset to add data to argument list
- * \param ptr - Pointer to arbitrary data
- * \param numbytes - Size of data to copy in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa ::cuFuncSetBlockShape,
- * ::cuFuncSetSharedSize,
- * ::cuFuncGetAttribute,
- * ::cuParamSetSize,
- * ::cuParamSetf,
- * ::cuParamSeti,
- * ::cuLaunch,
- * ::cuLaunchGrid,
- * ::cuLaunchGridAsync,
- * ::cuLaunchKernel
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes);
- /**
- * \brief Launches a CUDA function
- *
- * \deprecated
- *
- * Invokes the kernel \p f on a 1 x 1 x 1 grid of blocks. The block
- * contains the number of threads specified by a previous call to
- * ::cuFuncSetBlockShape().
- *
- * The block shape, dynamic shared memory size, and parameter information
- * must be set using
- * ::cuFuncSetBlockShape(),
- * ::cuFuncSetSharedSize(),
- * ::cuParamSetSize(),
- * ::cuParamSeti(),
- * ::cuParamSetf(), and
- * ::cuParamSetv()
- * prior to calling this function.
- *
- * Launching a function via ::cuLaunchKernel() invalidates the function's
- * block shape, dynamic shared memory size, and parameter information. After
- * launching via cuLaunchKernel, this state must be re-initialized prior to
- * calling this function. Failure to do so results in undefined behavior.
- *
- * \param f - Kernel to launch
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_LAUNCH_FAILED,
- * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
- * ::CUDA_ERROR_LAUNCH_TIMEOUT,
- * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
- * \notefnerr
- *
- * \sa ::cuFuncSetBlockShape,
- * ::cuFuncSetSharedSize,
- * ::cuFuncGetAttribute,
- * ::cuParamSetSize,
- * ::cuParamSetf,
- * ::cuParamSeti,
- * ::cuParamSetv,
- * ::cuLaunchGrid,
- * ::cuLaunchGridAsync,
- * ::cuLaunchKernel
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuLaunch(CUfunction f);
- /**
- * \brief Launches a CUDA function
- *
- * \deprecated
- *
- * Invokes the kernel \p f on a \p grid_width x \p grid_height grid of
- * blocks. Each block contains the number of threads specified by a previous
- * call to ::cuFuncSetBlockShape().
- *
- * The block shape, dynamic shared memory size, and parameter information
- * must be set using
- * ::cuFuncSetBlockShape(),
- * ::cuFuncSetSharedSize(),
- * ::cuParamSetSize(),
- * ::cuParamSeti(),
- * ::cuParamSetf(), and
- * ::cuParamSetv()
- * prior to calling this function.
- *
- * Launching a function via ::cuLaunchKernel() invalidates the function's
- * block shape, dynamic shared memory size, and parameter information. After
- * launching via cuLaunchKernel, this state must be re-initialized prior to
- * calling this function. Failure to do so results in undefined behavior.
- *
- * \param f - Kernel to launch
- * \param grid_width - Width of grid in blocks
- * \param grid_height - Height of grid in blocks
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_LAUNCH_FAILED,
- * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
- * ::CUDA_ERROR_LAUNCH_TIMEOUT,
- * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
- * \notefnerr
- *
- * \sa ::cuFuncSetBlockShape,
- * ::cuFuncSetSharedSize,
- * ::cuFuncGetAttribute,
- * ::cuParamSetSize,
- * ::cuParamSetf,
- * ::cuParamSeti,
- * ::cuParamSetv,
- * ::cuLaunch,
- * ::cuLaunchGridAsync,
- * ::cuLaunchKernel
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, int grid_height);
- /**
- * \brief Launches a CUDA function
- *
- * \deprecated
- *
- * Invokes the kernel \p f on a \p grid_width x \p grid_height grid of
- * blocks. Each block contains the number of threads specified by a previous
- * call to ::cuFuncSetBlockShape().
- *
- * The block shape, dynamic shared memory size, and parameter information
- * must be set using
- * ::cuFuncSetBlockShape(),
- * ::cuFuncSetSharedSize(),
- * ::cuParamSetSize(),
- * ::cuParamSeti(),
- * ::cuParamSetf(), and
- * ::cuParamSetv()
- * prior to calling this function.
- *
- * Launching a function via ::cuLaunchKernel() invalidates the function's
- * block shape, dynamic shared memory size, and parameter information. After
- * launching via cuLaunchKernel, this state must be re-initialized prior to
- * calling this function. Failure to do so results in undefined behavior.
- *
- * \param f - Kernel to launch
- * \param grid_width - Width of grid in blocks
- * \param grid_height - Height of grid in blocks
- * \param hStream - Stream identifier
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_LAUNCH_FAILED,
- * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
- * ::CUDA_ERROR_LAUNCH_TIMEOUT,
- * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
- * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
- *
- * \note In certain cases where cubins are created with no ABI (i.e., using \p ptxas \p --abi-compile \p no),
- * this function may serialize kernel launches. The CUDA driver retains asynchronous behavior by
- * growing the per-thread stack as needed per launch and not shrinking it afterwards.
- *
- * \note_null_stream
- * \notefnerr
- *
- * \sa ::cuFuncSetBlockShape,
- * ::cuFuncSetSharedSize,
- * ::cuFuncGetAttribute,
- * ::cuParamSetSize,
- * ::cuParamSetf,
- * ::cuParamSeti,
- * ::cuParamSetv,
- * ::cuLaunch,
- * ::cuLaunchGrid,
- * ::cuLaunchKernel
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
- /**
- * \brief Adds a texture-reference to the function's argument list
- *
- * \deprecated
- *
- * Makes the CUDA array or linear memory bound to the texture reference
- * \p hTexRef available to a device program as a texture. In this version of
- * CUDA, the texture-reference must be obtained via ::cuModuleGetTexRef() and
- * the \p texunit parameter must be set to ::CU_PARAM_TR_DEFAULT.
- *
- * \param hfunc - Kernel to add texture-reference to
- * \param texunit - Texture unit (must be ::CU_PARAM_TR_DEFAULT)
- * \param hTexRef - Texture-reference to add to argument list
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
- /**
- * \brief Sets the shared memory configuration for a device function.
- *
- * \deprecated
- *
- * On devices with configurable shared memory banks, this function will
- * force all subsequent launches of the specified device function to have
- * the given shared memory bank size configuration. On any given launch of the
- * function, the shared memory configuration of the device will be temporarily
- * changed if needed to suit the function's preferred configuration. Changes in
- * shared memory configuration between subsequent launches of functions,
- * may introduce a device side synchronization point.
- *
- * Any per-function setting of shared memory bank size set via
- * ::cuFuncSetSharedMemConfig will override the context wide setting set with
- * ::cuCtxSetSharedMemConfig.
- *
- * Changing the shared memory bank size will not increase shared memory usage
- * or affect occupancy of kernels, but may have major effects on performance.
- * Larger bank sizes will allow for greater potential bandwidth to shared memory,
- * but will change what kinds of accesses to shared memory will result in bank
- * conflicts.
- *
- * This function will do nothing on devices with fixed shared memory bank size.
- *
- * The supported bank configurations are:
- * - ::CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: use the context's shared memory
- * configuration when launching this function.
- * - ::CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width to
- * be natively four bytes when launching this function.
- * - ::CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank width to
- * be natively eight bytes when launching this function.
- *
- * \param hfunc - kernel to be given a shared memory config
- * \param config - requested shared memory configuration
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT
- * \notefnerr
- *
- * \sa ::cuCtxGetCacheConfig,
- * ::cuCtxSetCacheConfig,
- * ::cuCtxGetSharedMemConfig,
- * ::cuCtxSetSharedMemConfig,
- * ::cuFuncGetAttribute,
- * ::cuLaunchKernel,
- * ::cudaFuncSetSharedMemConfig
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config);
- /** @} */ /* END CUDA_EXEC_DEPRECATED */
- /**
- * \defgroup CUDA_GRAPH Graph Management
- *
- * ___MANBRIEF___ graph management functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the graph management functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Creates a graph
- *
- * Creates an empty graph, which is returned via \p phGraph.
- *
- * \param phGraph - Returns newly created graph
- * \param flags - Graph creation flags, must be 0
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddHostNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode,
- * ::cuGraphInstantiate,
- * ::cuGraphDestroy,
- * ::cuGraphGetNodes,
- * ::cuGraphGetRootNodes,
- * ::cuGraphGetEdges,
- * ::cuGraphClone
- */
- CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags);
- /**
- * \brief Creates a kernel execution node and adds it to a graph
- *
- * Creates a new kernel execution node and adds it to \p hGraph with \p numDependencies
- * dependencies specified via \p dependencies and arguments specified in \p nodeParams.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries.
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * The CUDA_KERNEL_NODE_PARAMS structure is defined as:
- *
- * \code
- * typedef struct CUDA_KERNEL_NODE_PARAMS_st {
- * CUfunction func;
- * unsigned int gridDimX;
- * unsigned int gridDimY;
- * unsigned int gridDimZ;
- * unsigned int blockDimX;
- * unsigned int blockDimY;
- * unsigned int blockDimZ;
- * unsigned int sharedMemBytes;
- * void **kernelParams;
- * void **extra;
- * CUkernel kern;
- * CUcontext ctx;
- * } CUDA_KERNEL_NODE_PARAMS;
- * \endcode
- *
- * When the graph is launched, the node will invoke kernel \p func on a (\p gridDimX x
- * \p gridDimY x \p gridDimZ) grid of blocks. Each block contains
- * (\p blockDimX x \p blockDimY x \p blockDimZ) threads.
- *
- * \p sharedMemBytes sets the amount of dynamic shared memory that will be
- * available to each thread block.
- *
- * Kernel parameters to \p func can be specified in one of two ways:
- *
- * 1) Kernel parameters can be specified via \p kernelParams. If the kernel has N
- * parameters, then \p kernelParams needs to be an array of N pointers. Each pointer,
- * from \p kernelParams[0] to \p kernelParams[N-1], points to the region of memory from which the actual
- * parameter will be copied. The number of kernel parameters and their offsets and sizes do not need
- * to be specified as that information is retrieved directly from the kernel's image.
- *
- * 2) Kernel parameters for non-cooperative kernels can also be packaged by the application into a single
- * buffer that is passed in via \p extra. This places the burden on the application of knowing each
- * kernel parameter's size and alignment/padding within the buffer. The \p extra parameter exists
- * to allow this function to take additional less commonly used arguments. \p extra specifies
- * a list of names of extra settings and their corresponding values. Each extra setting name is
- * immediately followed by the corresponding value. The list must be terminated with either NULL or
- * CU_LAUNCH_PARAM_END.
- *
- * - ::CU_LAUNCH_PARAM_END, which indicates the end of the \p extra
- * array;
- * - ::CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that the next
- * value in \p extra will be a pointer to a buffer
- * containing all the kernel parameters for launching kernel
- * \p func;
- * - ::CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies that the next
- * value in \p extra will be a pointer to a size_t
- * containing the size of the buffer specified with
- * ::CU_LAUNCH_PARAM_BUFFER_POINTER;
- *
- * The error ::CUDA_ERROR_INVALID_VALUE will be returned if kernel parameters are specified with both
- * \p kernelParams and \p extra (i.e. both \p kernelParams and \p extra are non-NULL).
- * ::CUDA_ERROR_INVALID_VALUE will be returned if \p extra is used for a cooperative kernel.
- *
- * The \p kernelParams or \p extra array, as well as the argument values it points to,
- * are copied during this call.
- *
- * \note Kernels launched using graphs must not use texture and surface references. Reading or
- * writing through any texture or surface reference is undefined behavior.
- * This restriction does not apply to texture and surface objects.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param nodeParams - Parameters for the GPU execution node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuLaunchKernel,
- * ::cuLaunchCooperativeKernel,
- * ::cuGraphKernelNodeGetParams,
- * ::cuGraphKernelNodeSetParams,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddHostNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddKernelNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams);
- /**
- * \brief Returns a kernel node's parameters
- *
- * Returns the parameters of kernel node \p hNode in \p nodeParams.
- * The \p kernelParams or \p extra array returned in \p nodeParams,
- * as well as the argument values it points to, are owned by the node.
- * This memory remains valid until the node is destroyed or its
- * parameters are modified, and should not be modified
- * directly. Use ::cuGraphKernelNodeSetParams to update the
- * parameters of this node.
- *
- * The params will contain either \p kernelParams or \p extra,
- * according to which of these was most recently set on the node.
- *
- * \param hNode - Node to get the parameters for
- * \param nodeParams - Pointer to return the parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuLaunchKernel,
- * ::cuGraphAddKernelNode,
- * ::cuGraphKernelNodeSetParams
- */
- CUresult CUDAAPI cuGraphKernelNodeGetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams);
- /**
- * \brief Sets a kernel node's parameters
- *
- * Sets the parameters of kernel node \p hNode to \p nodeParams.
- *
- * \param hNode - Node to set the parameters for
- * \param nodeParams - Parameters to copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetParams,
- * ::cuLaunchKernel,
- * ::cuGraphAddKernelNode,
- * ::cuGraphKernelNodeGetParams
- */
- CUresult CUDAAPI cuGraphKernelNodeSetParams(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams);
- /**
- * \brief Creates a memcpy node and adds it to a graph
- *
- * Creates a new memcpy node and adds it to \p hGraph with \p numDependencies
- * dependencies specified via \p dependencies.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries.
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * When the graph is launched, the node will perform the memcpy described by \p copyParams.
- * See ::cuMemcpy3D() for a description of the structure and its restrictions.
- *
- * Memcpy nodes have some additional restrictions with regards to managed memory, if the
- * system contains at least one device which has a zero value for the device attribute
- * ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. If one or more of the operands refer
- * to managed memory, then using the memory type ::CU_MEMORYTYPE_UNIFIED is disallowed
- * for those operand(s). The managed memory will be treated as residing on either the
- * host or the device, depending on which memory type is specified.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param copyParams - Parameters for the memory copy
- * \param ctx - Context on which to run the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuMemcpy3D,
- * ::cuGraphMemcpyNodeGetParams,
- * ::cuGraphMemcpyNodeSetParams,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddHostNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_MEMCPY3D *copyParams, CUcontext ctx);
- /**
- * \brief Returns a memcpy node's parameters
- *
- * Returns the parameters of memcpy node \p hNode in \p nodeParams.
- *
- * \param hNode - Node to get the parameters for
- * \param nodeParams - Pointer to return the parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuMemcpy3D,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphMemcpyNodeSetParams
- */
- CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D *nodeParams);
- /**
- * \brief Sets a memcpy node's parameters
- *
- * Sets the parameters of memcpy node \p hNode to \p nodeParams.
- *
- * \param hNode - Node to set the parameters for
- * \param nodeParams - Parameters to copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetParams,
- * ::cuMemcpy3D,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphMemcpyNodeGetParams
- */
- CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D *nodeParams);
- /**
- * \brief Creates a memset node and adds it to a graph
- *
- * Creates a new memset node and adds it to \p hGraph with \p numDependencies
- * dependencies specified via \p dependencies.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries.
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * The element size must be 1, 2, or 4 bytes.
- * When the graph is launched, the node will perform the memset described by \p memsetParams.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param memsetParams - Parameters for the memory set
- * \param ctx - Context on which to run the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_CONTEXT
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuMemsetD2D32,
- * ::cuGraphMemsetNodeGetParams,
- * ::cuGraphMemsetNodeSetParams,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddHostNode,
- * ::cuGraphAddMemcpyNode
- */
- CUresult CUDAAPI cuGraphAddMemsetNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx);
- /**
- * \brief Returns a memset node's parameters
- *
- * Returns the parameters of memset node \p hNode in \p nodeParams.
- *
- * \param hNode - Node to get the parameters for
- * \param nodeParams - Pointer to return the parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuMemsetD2D32,
- * ::cuGraphAddMemsetNode,
- * ::cuGraphMemsetNodeSetParams
- */
- CUresult CUDAAPI cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams);
- /**
- * \brief Sets a memset node's parameters
- *
- * Sets the parameters of memset node \p hNode to \p nodeParams.
- *
- * \param hNode - Node to set the parameters for
- * \param nodeParams - Parameters to copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetParams,
- * ::cuMemsetD2D32,
- * ::cuGraphAddMemsetNode,
- * ::cuGraphMemsetNodeGetParams
- */
- CUresult CUDAAPI cuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams);
- /**
- * \brief Creates a host execution node and adds it to a graph
- *
- * Creates a new CPU execution node and adds it to \p hGraph with \p numDependencies
- * dependencies specified via \p dependencies and arguments specified in \p nodeParams.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries.
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * When the graph is launched, the node will invoke the specified CPU function.
- * Host nodes are not supported under MPS with pre-Volta GPUs.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param nodeParams - Parameters for the host node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuLaunchHostFunc,
- * ::cuGraphHostNodeGetParams,
- * ::cuGraphHostNodeSetParams,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS *nodeParams);
- /**
- * \brief Returns a host node's parameters
- *
- * Returns the parameters of host node \p hNode in \p nodeParams.
- *
- * \param hNode - Node to get the parameters for
- * \param nodeParams - Pointer to return the parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuLaunchHostFunc,
- * ::cuGraphAddHostNode,
- * ::cuGraphHostNodeSetParams
- */
- CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS *nodeParams);
- /**
- * \brief Sets a host node's parameters
- *
- * Sets the parameters of host node \p hNode to \p nodeParams.
- *
- * \param hNode - Node to set the parameters for
- * \param nodeParams - Parameters to copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetParams,
- * ::cuLaunchHostFunc,
- * ::cuGraphAddHostNode,
- * ::cuGraphHostNodeGetParams
- */
- CUresult CUDAAPI cuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams);
- /**
- * \brief Creates a child graph node and adds it to a graph
- *
- * Creates a new node which executes an embedded graph, and adds it to \p hGraph with
- * \p numDependencies dependencies specified via \p dependencies.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries.
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * If \p hGraph contains allocation or free nodes, this call will return an error.
- *
- * The node executes an embedded child graph. The child graph is cloned in this call.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param childGraph - The graph to clone into this node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphChildGraphNodeGetGraph,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddHostNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode,
- * ::cuGraphClone
- */
- CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUgraph childGraph);
- /**
- * \brief Gets a handle to the embedded graph of a child graph node
- *
- * Gets a handle to the embedded graph in a child graph node. This call
- * does not clone the graph. Changes to the graph will be reflected in
- * the node, and the node retains ownership of the graph.
- *
- * Allocation and free nodes cannot be added to the returned graph.
- * Attempting to do so will return an error.
- *
- * \param hNode - Node to get the embedded graph for
- * \param phGraph - Location to store a handle to the graph
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphNodeFindInClone
- */
- CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph *phGraph);
- /**
- * \brief Creates an empty node and adds it to a graph
- *
- * Creates a new node which performs no operation, and adds it to \p hGraph with
- * \p numDependencies dependencies specified via \p dependencies.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries.
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * An empty node performs no operation during execution, but can be used for
- * transitive ordering. For example, a phased execution graph with 2 groups of n
- * nodes with a barrier between them can be represented using an empty node and
- * 2*n dependency edges, rather than no empty node and n^2 dependency edges.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddHostNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies);
- /**
- * \brief Creates an event record node and adds it to a graph
- *
- * Creates a new event record node and adds it to \p hGraph with \p numDependencies
- * dependencies specified via \p dependencies and event specified in \p event.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries.
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * Each launch of the graph will record \p event to capture execution of the
- * node's dependencies.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param event - Event for the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphAddEventWaitNode,
- * ::cuEventRecordWithFlags,
- * ::cuStreamWaitEvent,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUevent event);
- /**
- * \brief Returns the event associated with an event record node
- *
- * Returns the event of event record node \p hNode in \p event_out.
- *
- * \param hNode - Node to get the event for
- * \param event_out - Pointer to return the event
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddEventRecordNode,
- * ::cuGraphEventRecordNodeSetEvent,
- * ::cuGraphEventWaitNodeGetEvent,
- * ::cuEventRecordWithFlags,
- * ::cuStreamWaitEvent
- */
- CUresult CUDAAPI cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent *event_out);
- /**
- * \brief Sets an event record node's event
- *
- * Sets the event of event record node \p hNode to \p event.
- *
- * \param hNode - Node to set the event for
- * \param event - Event to use
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetParams,
- * ::cuGraphAddEventRecordNode,
- * ::cuGraphEventRecordNodeGetEvent,
- * ::cuGraphEventWaitNodeSetEvent,
- * ::cuEventRecordWithFlags,
- * ::cuStreamWaitEvent
- */
- CUresult CUDAAPI cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event);
- /**
- * \brief Creates an event wait node and adds it to a graph
- *
- * Creates a new event wait node and adds it to \p hGraph with \p numDependencies
- * dependencies specified via \p dependencies and event specified in \p event.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries.
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * The graph node will wait for all work captured in \p event. See ::cuEventRecord()
- * for details on what is captured by an event. \p event may be from a different context
- * or device than the launch stream.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param event - Event for the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphAddEventRecordNode,
- * ::cuEventRecordWithFlags,
- * ::cuStreamWaitEvent,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUevent event);
- /**
- * \brief Returns the event associated with an event wait node
- *
- * Returns the event of event wait node \p hNode in \p event_out.
- *
- * \param hNode - Node to get the event for
- * \param event_out - Pointer to return the event
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddEventWaitNode,
- * ::cuGraphEventWaitNodeSetEvent,
- * ::cuGraphEventRecordNodeGetEvent,
- * ::cuEventRecordWithFlags,
- * ::cuStreamWaitEvent
- */
- CUresult CUDAAPI cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent *event_out);
- /**
- * \brief Sets an event wait node's event
- *
- * Sets the event of event wait node \p hNode to \p event.
- *
- * \param hNode - Node to set the event for
- * \param event - Event to use
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetParams,
- * ::cuGraphAddEventWaitNode,
- * ::cuGraphEventWaitNodeGetEvent,
- * ::cuGraphEventRecordNodeSetEvent,
- * ::cuEventRecordWithFlags,
- * ::cuStreamWaitEvent
- */
- CUresult CUDAAPI cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event);
- /**
- * \brief Creates an external semaphore signal node and adds it to a graph
- *
- * Creates a new external semaphore signal node and adds it to \p hGraph with \p
- * numDependencies dependencies specified via \p dependencies and arguments specified
- * in \p nodeParams. It is possible for \p numDependencies to be 0, in which case the
- * node will be placed at the root of the graph. \p dependencies may not have any
- * duplicate entries. A handle to the new node will be returned in \p phGraphNode.
- *
- * Performs a signal operation on a set of externally allocated semaphore objects
- * when the node is launched. The operation(s) will occur after all of the node's
- * dependencies have completed.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param nodeParams - Parameters for the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphExternalSemaphoresSignalNodeGetParams,
- * ::cuGraphExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphExecExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuImportExternalSemaphore,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddEventRecordNode,
- * ::cuGraphAddEventWaitNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddExternalSemaphoresSignalNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams);
- /**
- * \brief Returns an external semaphore signal node's parameters
- *
- * Returns the parameters of an external semaphore signal node \p hNode in \p params_out.
- * The \p extSemArray and \p paramsArray returned in \p params_out,
- * are owned by the node. This memory remains valid until the node is destroyed or its
- * parameters are modified, and should not be modified
- * directly. Use ::cuGraphExternalSemaphoresSignalNodeSetParams to update the
- * parameters of this node.
- *
- * \param hNode - Node to get the parameters for
- * \param params_out - Pointer to return the parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuLaunchKernel,
- * ::cuGraphAddExternalSemaphoresSignalNode,
- * ::cuGraphExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync
- */
- CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out);
- /**
- * \brief Sets an external semaphore signal node's parameters
- *
- * Sets the parameters of an external semaphore signal node \p hNode to \p nodeParams.
- *
- * \param hNode - Node to set the parameters for
- * \param nodeParams - Parameters to copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetParams,
- * ::cuGraphAddExternalSemaphoresSignalNode,
- * ::cuGraphExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync
- */
- CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams);
- /**
- * \brief Creates an external semaphore wait node and adds it to a graph
- *
- * Creates a new external semaphore wait node and adds it to \p hGraph with \p numDependencies
- * dependencies specified via \p dependencies and arguments specified in \p nodeParams.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries. A handle
- * to the new node will be returned in \p phGraphNode.
- *
- * Performs a wait operation on a set of externally allocated semaphore objects
- * when the node is launched. The node's dependencies will not be launched until
- * the wait operation has completed.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param nodeParams - Parameters for the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphExternalSemaphoresWaitNodeGetParams,
- * ::cuGraphExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphExecExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphAddExternalSemaphoresSignalNode,
- * ::cuImportExternalSemaphore,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddEventRecordNode,
- * ::cuGraphAddEventWaitNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddExternalSemaphoresWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams);
- /**
- * \brief Returns an external semaphore wait node's parameters
- *
- * Returns the parameters of an external semaphore wait node \p hNode in \p params_out.
- * The \p extSemArray and \p paramsArray returned in \p params_out,
- * are owned by the node. This memory remains valid until the node is destroyed or its
- * parameters are modified, and should not be modified
- * directly. Use ::cuGraphExternalSemaphoresSignalNodeSetParams to update the
- * parameters of this node.
- *
- * \param hNode - Node to get the parameters for
- * \param params_out - Pointer to return the parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuLaunchKernel,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuGraphExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync
- */
- CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out);
- /**
- * \brief Sets an external semaphore wait node's parameters
- *
- * Sets the parameters of an external semaphore wait node \p hNode to \p nodeParams.
- *
- * \param hNode - Node to set the parameters for
- * \param nodeParams - Parameters to copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetParams,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuGraphExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync
- */
- CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams);
- /**
- * \brief Creates a batch memory operation node and adds it to a graph
- *
- * Creates a new batch memory operation node and adds it to \p hGraph with \p
- * numDependencies dependencies specified via \p dependencies and arguments specified in \p nodeParams.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries.
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * When the node is added, the paramArray inside \p nodeParams is copied and therefore it can be
- * freed after the call returns.
- *
- * \note
- * Warning:
- * Improper use of this API may deadlock the application. Synchronization
- * ordering established through this API is not visible to CUDA. CUDA tasks
- * that are (even indirectly) ordered by this API should also have that order
- * expressed with CUDA-visible dependencies such as events. This ensures that
- * the scheduler does not serialize them in an improper order. For more
- * information, see the Stream Memory Operations section in the programming
- * guide(https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html).
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param nodeParams - Parameters for the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuStreamBatchMemOp,
- * ::cuStreamWaitValue32,
- * ::cuStreamWriteValue32,
- * ::cuStreamWaitValue64,
- * ::cuStreamWriteValue64,
- * ::cuGraphBatchMemOpNodeGetParams,
- * ::cuGraphBatchMemOpNodeSetParams,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddBatchMemOpNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams);
- /**
- * \brief Returns a batch mem op node's parameters
- *
- * Returns the parameters of batch mem op node \p hNode in \p nodeParams_out.
- * The \p paramArray returned in \p nodeParams_out is owned by the node.
- * This memory remains valid until the node is destroyed or its
- * parameters are modified, and should not be modified
- * directly. Use ::cuGraphBatchMemOpNodeSetParams to update the
- * parameters of this node.
- *
- * \param hNode - Node to get the parameters for
- * \param nodeParams_out - Pointer to return the parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuStreamBatchMemOp,
- * ::cuGraphAddBatchMemOpNode,
- * ::cuGraphBatchMemOpNodeSetParams
- */
- CUresult CUDAAPI cuGraphBatchMemOpNodeGetParams(CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams_out);
- /**
- * \brief Sets a batch mem op node's parameters
- *
- * Sets the parameters of batch mem op node \p hNode to \p nodeParams.
- *
- * The paramArray inside \p nodeParams is copied and therefore it can be
- * freed after the call returns.
- *
- * \param hNode - Node to set the parameters for
- * \param nodeParams - Parameters to copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetParams,
- * ::cuStreamBatchMemOp,
- * ::cuGraphAddBatchMemOpNode,
- * ::cuGraphBatchMemOpNodeGetParams
- */
- CUresult CUDAAPI cuGraphBatchMemOpNodeSetParams(CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams);
- /**
- * \brief Sets the parameters for a batch mem op node in the given graphExec
- *
- * Sets the parameters of a batch mem op node in an executable graph \p hGraphExec.
- * The node is identified by the corresponding node \p hNode in the
- * non-executable graph, from which the executable graph was instantiated.
- *
- * The following fields on operations may be modified on an executable graph:
- *
- * op.waitValue.address
- * op.waitValue.value[64]
- * op.waitValue.flags bits corresponding to wait type (i.e. CU_STREAM_WAIT_VALUE_FLUSH bit cannot be modified)
- * op.writeValue.address
- * op.writeValue.value[64]
- *
- * Other fields, such as the context, count or type of operations, and other types of operations such as membars,
- * may not be modified.
- *
- * \p hNode must not have been removed from the original graph.
- *
- * The modifications only affect future launches of \p hGraphExec. Already
- * enqueued or running launches of \p hGraphExec are not affected by this call.
- * \p hNode is also not modified by this call.
- *
- * The paramArray inside \p nodeParams is copied and therefore it can be
- * freed after the call returns.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - Batch mem op node from the graph from which graphExec was instantiated
- * \param nodeParams - Updated Parameters to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuStreamBatchMemOp,
- * ::cuGraphAddBatchMemOpNode,
- * ::cuGraphBatchMemOpNodeGetParams,
- * ::cuGraphBatchMemOpNodeSetParams,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecBatchMemOpNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams);
- /**
- * \brief Creates an allocation node and adds it to a graph
- *
- * Creates a new allocation node and adds it to \p hGraph with \p numDependencies
- * dependencies specified via \p dependencies and arguments specified in \p nodeParams.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries. A handle
- * to the new node will be returned in \p phGraphNode.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param nodeParams - Parameters for the node
- *
- * When ::cuGraphAddMemAllocNode creates an allocation node, it returns the address of the allocation in
- * \p nodeParams.dptr. The allocation's address remains fixed across instantiations and launches.
- *
- * If the allocation is freed in the same graph, by creating a free node using ::cuGraphAddMemFreeNode,
- * the allocation can be accessed by nodes ordered after the allocation node but before the free node.
- * These allocations cannot be freed outside the owning graph, and they can only be freed once in the
- * owning graph.
- *
- * If the allocation is not freed in the same graph, then it can be accessed not only by nodes in the
- * graph which are ordered after the allocation node, but also by stream operations ordered after the
- * graph's execution but before the allocation is freed.
- *
- * Allocations which are not freed in the same graph can be freed by:
- * - passing the allocation to ::cuMemFreeAsync or ::cuMemFree;
- * - launching a graph with a free node for that allocation; or
- * - specifying ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH during instantiation, which makes
- * each launch behave as though it called ::cuMemFreeAsync for every unfreed allocation.
- *
- * It is not possible to free an allocation in both the owning graph and another graph. If the allocation
- * is freed in the same graph, a free node cannot be added to another graph. If the allocation is freed
- * in another graph, a free node can no longer be added to the owning graph.
- *
- * The following restrictions apply to graphs which contain allocation and/or memory free nodes:
- * - Nodes and edges of the graph cannot be deleted.
- * - The graph cannot be used in a child node.
- * - Only one instantiation of the graph may exist at any point in time.
- * - The graph cannot be cloned.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphAddMemFreeNode,
- * ::cuGraphMemAllocNodeGetParams,
- * ::cuDeviceGraphMemTrim,
- * ::cuDeviceGetGraphMemAttribute,
- * ::cuDeviceSetGraphMemAttribute,
- * ::cuMemAllocAsync,
- * ::cuMemFreeAsync,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddEventRecordNode,
- * ::cuGraphAddEventWaitNode,
- * ::cuGraphAddExternalSemaphoresSignalNode,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddMemAllocNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS *nodeParams);
- /**
- * \brief Returns a memory alloc node's parameters
- *
- * Returns the parameters of a memory alloc node \p hNode in \p params_out.
- * The \p poolProps and \p accessDescs returned in \p params_out, are owned by the
- * node. This memory remains valid until the node is destroyed. The returned
- * parameters must not be modified.
- *
- * \param hNode - Node to get the parameters for
- * \param params_out - Pointer to return the parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddMemAllocNode,
- * ::cuGraphMemFreeNodeGetParams
- */
- CUresult CUDAAPI cuGraphMemAllocNodeGetParams(CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS *params_out);
- /**
- * \brief Creates a memory free node and adds it to a graph
- *
- * Creates a new memory free node and adds it to \p hGraph with \p numDependencies
- * dependencies specified via \p dependencies and arguments specified in \p nodeParams.
- * It is possible for \p numDependencies to be 0, in which case the node will be placed
- * at the root of the graph. \p dependencies may not have any duplicate entries. A handle
- * to the new node will be returned in \p phGraphNode.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param dptr - Address of memory to free
- *
- * ::cuGraphAddMemFreeNode will return ::CUDA_ERROR_INVALID_VALUE if the user attempts to free:
- * - an allocation twice in the same graph.
- * - an address that was not returned by an allocation node.
- * - an invalid address.
- *
- * The following restrictions apply to graphs which contain allocation and/or memory free nodes:
- * - Nodes and edges of the graph cannot be deleted.
- * - The graph cannot be used in a child node.
- * - Only one instantiation of the graph may exist at any point in time.
- * - The graph cannot be cloned.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphAddMemAllocNode,
- * ::cuGraphMemFreeNodeGetParams,
- * ::cuDeviceGraphMemTrim,
- * ::cuDeviceGetGraphMemAttribute,
- * ::cuDeviceSetGraphMemAttribute,
- * ::cuMemAllocAsync,
- * ::cuMemFreeAsync,
- * ::cuGraphCreate,
- * ::cuGraphDestroyNode,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddEventRecordNode,
- * ::cuGraphAddEventWaitNode,
- * ::cuGraphAddExternalSemaphoresSignalNode,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphAddMemFreeNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUdeviceptr dptr);
- /**
- * \brief Returns a memory free node's parameters
- *
- * Returns the address of a memory free node \p hNode in \p dptr_out.
- *
- * \param hNode - Node to get the parameters for
- * \param dptr_out - Pointer to return the device address
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddMemFreeNode,
- * ::cuGraphMemAllocNodeGetParams
- */
- CUresult CUDAAPI cuGraphMemFreeNodeGetParams(CUgraphNode hNode, CUdeviceptr *dptr_out);
- /**
- * \brief Free unused memory that was cached on the specified device for use with graphs back to the OS.
- *
- * Blocks which are not in use by a graph that is either currently executing or scheduled to execute are
- * freed back to the operating system.
- *
- * \param device - The device for which cached memory should be freed.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_DEVICE
- *
- * \sa
- * ::cuGraphAddMemAllocNode,
- * ::cuGraphAddMemFreeNode,
- * ::cuDeviceSetGraphMemAttribute,
- * ::cuDeviceGetGraphMemAttribute
- */
- CUresult CUDAAPI cuDeviceGraphMemTrim(CUdevice device);
- /**
- * \brief Query asynchronous allocation attributes related to graphs
- *
- * Valid attributes are:
- *
- * - ::CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT: Amount of memory, in bytes, currently associated with graphs
- * - ::CU_GRAPH_MEM_ATTR_USED_MEM_HIGH: High watermark of memory, in bytes, associated with graphs since the
- * last time it was reset. High watermark can only be reset to zero.
- * - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT: Amount of memory, in bytes, currently allocated for use by
- * the CUDA graphs asynchronous allocator.
- * - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH: High watermark of memory, in bytes, currently allocated for use by
- * the CUDA graphs asynchronous allocator.
- *
- * \param device - Specifies the scope of the query
- * \param attr - attribute to get
- * \param value - retrieved value
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_DEVICE
- *
- * \sa
- * ::cuDeviceSetGraphMemAttribute,
- * ::cuGraphAddMemAllocNode,
- * ::cuGraphAddMemFreeNode
- */
- CUresult CUDAAPI cuDeviceGetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value);
- /**
- * \brief Set asynchronous allocation attributes related to graphs
- *
- * Valid attributes are:
- *
- * - ::CU_GRAPH_MEM_ATTR_USED_MEM_HIGH: High watermark of memory, in bytes, associated with graphs since the
- * last time it was reset. High watermark can only be reset to zero.
- * - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH: High watermark of memory, in bytes, currently allocated for use by
- * the CUDA graphs asynchronous allocator.
- *
- * \param device - Specifies the scope of the query
- * \param attr - attribute to get
- * \param value - pointer to value to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_DEVICE
- *
- * \sa
- * ::cuDeviceGetGraphMemAttribute,
- * ::cuGraphAddMemAllocNode,
- * ::cuGraphAddMemFreeNode
- */
- CUresult CUDAAPI cuDeviceSetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value);
- /**
- * \brief Clones a graph
- *
- * This function creates a copy of \p originalGraph and returns it in \p phGraphClone.
- * All parameters are copied into the cloned graph. The original graph may be modified
- * after this call without affecting the clone.
- *
- * Child graph nodes in the original graph are recursively copied into the clone.
- *
- * \param phGraphClone - Returns newly created cloned graph
- * \param originalGraph - Graph to clone
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphCreate,
- * ::cuGraphNodeFindInClone
- */
- CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph);
- /**
- * \brief Finds a cloned version of a node
- *
- * This function returns the node in \p hClonedGraph corresponding to \p hOriginalNode
- * in the original graph.
- *
- * \p hClonedGraph must have been cloned from \p hOriginalGraph via ::cuGraphClone.
- * \p hOriginalNode must have been in \p hOriginalGraph at the time of the call to
- * ::cuGraphClone, and the corresponding cloned node in \p hClonedGraph must not have
- * been removed. The cloned node is then returned via \p phClonedNode.
- *
- * \param phNode - Returns handle to the cloned node
- * \param hOriginalNode - Handle to the original node
- * \param hClonedGraph - Cloned graph to query
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphClone
- */
- CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph);
- /**
- * \brief Returns a node's type
- *
- * Returns the node type of \p hNode in \p type.
- *
- * \param hNode - Node to query
- * \param type - Pointer to return the node type
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphGetNodes,
- * ::cuGraphGetRootNodes,
- * ::cuGraphChildGraphNodeGetGraph,
- * ::cuGraphKernelNodeGetParams,
- * ::cuGraphKernelNodeSetParams,
- * ::cuGraphHostNodeGetParams,
- * ::cuGraphHostNodeSetParams,
- * ::cuGraphMemcpyNodeGetParams,
- * ::cuGraphMemcpyNodeSetParams,
- * ::cuGraphMemsetNodeGetParams,
- * ::cuGraphMemsetNodeSetParams
- */
- CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type);
- /**
- * \brief Returns a graph's nodes
- *
- * Returns a list of \p hGraph's nodes. \p nodes may be NULL, in which case this
- * function will return the number of nodes in \p numNodes. Otherwise,
- * \p numNodes entries will be filled in. If \p numNodes is higher than the actual
- * number of nodes, the remaining entries in \p nodes will be set to NULL, and the
- * number of nodes actually obtained will be returned in \p numNodes.
- *
- * \param hGraph - Graph to query
- * \param nodes - Pointer to return the nodes
- * \param numNodes - See description
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphCreate,
- * ::cuGraphGetRootNodes,
- * ::cuGraphGetEdges,
- * ::cuGraphNodeGetType,
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphNodeGetDependentNodes
- */
- CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, size_t *numNodes);
- /**
- * \brief Returns a graph's root nodes
- *
- * Returns a list of \p hGraph's root nodes. \p rootNodes may be NULL, in which case this
- * function will return the number of root nodes in \p numRootNodes. Otherwise,
- * \p numRootNodes entries will be filled in. If \p numRootNodes is higher than the actual
- * number of root nodes, the remaining entries in \p rootNodes will be set to NULL, and the
- * number of nodes actually obtained will be returned in \p numRootNodes.
- *
- * \param hGraph - Graph to query
- * \param rootNodes - Pointer to return the root nodes
- * \param numRootNodes - See description
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphCreate,
- * ::cuGraphGetNodes,
- * ::cuGraphGetEdges,
- * ::cuGraphNodeGetType,
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphNodeGetDependentNodes
- */
- CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, size_t *numRootNodes);
- /**
- * \brief Returns a graph's dependency edges
- *
- * Returns a list of \p hGraph's dependency edges. Edges are returned via corresponding
- * indices in \p from and \p to; that is, the node in \p to[i] has a dependency on the
- * node in \p from[i]. \p from and \p to may both be NULL, in which
- * case this function only returns the number of edges in \p numEdges. Otherwise,
- * \p numEdges entries will be filled in. If \p numEdges is higher than the actual
- * number of edges, the remaining entries in \p from and \p to will be set to NULL, and
- * the number of edges actually returned will be written to \p numEdges.
- *
- * \param hGraph - Graph to get the edges from
- * \param from - Location to return edge endpoints
- * \param to - Location to return edge endpoints
- * \param numEdges - See description
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphGetNodes,
- * ::cuGraphGetRootNodes,
- * ::cuGraphAddDependencies,
- * ::cuGraphRemoveDependencies,
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphNodeGetDependentNodes
- */
- CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, CUgraphNode *to, size_t *numEdges);
- /**
- * \brief Returns a graph's dependency edges (12.3+)
- *
- * Returns a list of \p hGraph's dependency edges. Edges are returned via corresponding
- * indices in \p from, \p to and \p edgeData; that is, the node in \p to[i] has a
- * dependency on the node in \p from[i] with data \p edgeData[i]. \p from and \p to may
- * both be NULL, in which case this function only returns the number of edges in
- * \p numEdges. Otherwise, \p numEdges entries will be filled in. If \p numEdges is higher
- * than the actual number of edges, the remaining entries in \p from and \p to will be
- * set to NULL, and the number of edges actually returned will be written to \p numEdges.
- * \p edgeData may alone be NULL, in which case the edges must all have default (zeroed)
- * edge data. Attempting a lossy query via NULL \p edgeData will result in
- * ::CUDA_ERROR_LOSSY_QUERY. If \p edgeData is non-NULL then \p from and \p to must be
- * as well.
- *
- * \param hGraph - Graph to get the edges from
- * \param from - Location to return edge endpoints
- * \param to - Location to return edge endpoints
- * \param edgeData - Optional location to return edge data
- * \param numEdges - See description
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_LOSSY_QUERY,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphGetNodes,
- * ::cuGraphGetRootNodes,
- * ::cuGraphAddDependencies,
- * ::cuGraphRemoveDependencies,
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphNodeGetDependentNodes
- */
- CUresult CUDAAPI cuGraphGetEdges_v2(CUgraph hGraph, CUgraphNode *from, CUgraphNode *to, CUgraphEdgeData *edgeData, size_t *numEdges);
- /**
- * \brief Returns a node's dependencies
- *
- * Returns a list of \p node's dependencies. \p dependencies may be NULL, in which case this
- * function will return the number of dependencies in \p numDependencies. Otherwise,
- * \p numDependencies entries will be filled in. If \p numDependencies is higher than the actual
- * number of dependencies, the remaining entries in \p dependencies will be set to NULL, and the
- * number of nodes actually obtained will be returned in \p numDependencies.
- *
- * \param hNode - Node to query
- * \param dependencies - Pointer to return the dependencies
- * \param numDependencies - See description
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeGetDependentNodes,
- * ::cuGraphGetNodes,
- * ::cuGraphGetRootNodes,
- * ::cuGraphGetEdges,
- * ::cuGraphAddDependencies,
- * ::cuGraphRemoveDependencies
- */
- CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode *dependencies, size_t *numDependencies);
- /**
- * \brief Returns a node's dependencies (12.3+)
- *
- * Returns a list of \p node's dependencies. \p dependencies may be NULL, in which case this
- * function will return the number of dependencies in \p numDependencies. Otherwise,
- * \p numDependencies entries will be filled in. If \p numDependencies is higher than the actual
- * number of dependencies, the remaining entries in \p dependencies will be set to NULL, and the
- * number of nodes actually obtained will be returned in \p numDependencies.
- *
- * Note that if an edge has non-zero (non-default) edge data and \p edgeData is NULL,
- * this API will return ::CUDA_ERROR_LOSSY_QUERY. If \p edgeData is non-NULL, then
- * \p dependencies must be as well.
- *
- * \param hNode - Node to query
- * \param dependencies - Pointer to return the dependencies
- * \param edgeData - Optional array to return edge data for each dependency
- * \param numDependencies - See description
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_LOSSY_QUERY,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeGetDependentNodes,
- * ::cuGraphGetNodes,
- * ::cuGraphGetRootNodes,
- * ::cuGraphGetEdges,
- * ::cuGraphAddDependencies,
- * ::cuGraphRemoveDependencies
- */
- CUresult CUDAAPI cuGraphNodeGetDependencies_v2(CUgraphNode hNode, CUgraphNode *dependencies, CUgraphEdgeData *edgeData, size_t *numDependencies);
- /**
- * \brief Returns a node's dependent nodes
- *
- * Returns a list of \p node's dependent nodes. \p dependentNodes may be NULL, in which
- * case this function will return the number of dependent nodes in \p numDependentNodes.
- * Otherwise, \p numDependentNodes entries will be filled in. If \p numDependentNodes is
- * higher than the actual number of dependent nodes, the remaining entries in
- * \p dependentNodes will be set to NULL, and the number of nodes actually obtained will
- * be returned in \p numDependentNodes.
- *
- * \param hNode - Node to query
- * \param dependentNodes - Pointer to return the dependent nodes
- * \param numDependentNodes - See description
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphGetNodes,
- * ::cuGraphGetRootNodes,
- * ::cuGraphGetEdges,
- * ::cuGraphAddDependencies,
- * ::cuGraphRemoveDependencies
- */
- CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode *dependentNodes, size_t *numDependentNodes);
- /**
- * \brief Returns a node's dependent nodes (12.3+)
- *
- * Returns a list of \p node's dependent nodes. \p dependentNodes may be NULL, in which
- * case this function will return the number of dependent nodes in \p numDependentNodes.
- * Otherwise, \p numDependentNodes entries will be filled in. If \p numDependentNodes is
- * higher than the actual number of dependent nodes, the remaining entries in
- * \p dependentNodes will be set to NULL, and the number of nodes actually obtained will
- * be returned in \p numDependentNodes.
- *
- * Note that if an edge has non-zero (non-default) edge data and \p edgeData is NULL,
- * this API will return ::CUDA_ERROR_LOSSY_QUERY. If \p edgeData is non-NULL, then
- * \p dependentNodes must be as well.
- *
- * \param hNode - Node to query
- * \param dependentNodes - Pointer to return the dependent nodes
- * \param edgeData - Optional pointer to return edge data for dependent nodes
- * \param numDependentNodes - See description
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_LOSSY_QUERY,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphGetNodes,
- * ::cuGraphGetRootNodes,
- * ::cuGraphGetEdges,
- * ::cuGraphAddDependencies,
- * ::cuGraphRemoveDependencies
- */
- CUresult CUDAAPI cuGraphNodeGetDependentNodes_v2(CUgraphNode hNode, CUgraphNode *dependentNodes, CUgraphEdgeData *edgeData, size_t *numDependentNodes);
- /**
- * \brief Adds dependency edges to a graph
- *
- * The number of dependencies to be added is defined by \p numDependencies
- * Elements in \p from and \p to at corresponding indices define a dependency.
- * Each node in \p from and \p to must belong to \p hGraph.
- *
- * If \p numDependencies is 0, elements in \p from and \p to will be ignored.
- * Specifying an existing dependency will return an error.
- *
- * \param hGraph - Graph to which dependencies are added
- * \param from - Array of nodes that provide the dependencies
- * \param to - Array of dependent nodes
- * \param numDependencies - Number of dependencies to be added
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphRemoveDependencies,
- * ::cuGraphGetEdges,
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphNodeGetDependentNodes
- */
- CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, size_t numDependencies);
- /**
- * \brief Adds dependency edges to a graph (12.3+)
- *
- * The number of dependencies to be added is defined by \p numDependencies
- * Elements in \p from and \p to at corresponding indices define a dependency.
- * Each node in \p from and \p to must belong to \p hGraph.
- *
- * If \p numDependencies is 0, elements in \p from and \p to will be ignored.
- * Specifying an existing dependency will return an error.
- *
- * \param hGraph - Graph to which dependencies are added
- * \param from - Array of nodes that provide the dependencies
- * \param to - Array of dependent nodes
- * \param edgeData - Optional array of edge data. If NULL, default (zeroed) edge data is assumed.
- * \param numDependencies - Number of dependencies to be added
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphRemoveDependencies,
- * ::cuGraphGetEdges,
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphNodeGetDependentNodes
- */
- CUresult CUDAAPI cuGraphAddDependencies_v2(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, const CUgraphEdgeData *edgeData, size_t numDependencies);
- /**
- * \brief Removes dependency edges from a graph
- *
- * The number of \p dependencies to be removed is defined by \p numDependencies.
- * Elements in \p from and \p to at corresponding indices define a dependency.
- * Each node in \p from and \p to must belong to \p hGraph.
- *
- * If \p numDependencies is 0, elements in \p from and \p to will be ignored.
- * Specifying a non-existing dependency will return an error.
- *
- * Dependencies cannot be removed from graphs which contain allocation or free nodes.
- * Any attempt to do so will return an error.
- *
- * \param hGraph - Graph from which to remove dependencies
- * \param from - Array of nodes that provide the dependencies
- * \param to - Array of dependent nodes
- * \param numDependencies - Number of dependencies to be removed
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddDependencies,
- * ::cuGraphGetEdges,
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphNodeGetDependentNodes
- */
- CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, size_t numDependencies);
- /**
- * \brief Removes dependency edges from a graph (12.3+)
- *
- * The number of \p dependencies to be removed is defined by \p numDependencies.
- * Elements in \p from and \p to at corresponding indices define a dependency.
- * Each node in \p from and \p to must belong to \p hGraph.
- *
- * If \p numDependencies is 0, elements in \p from and \p to will be ignored.
- * Specifying an edge that does not exist in the graph, with data matching
- * \p edgeData, results in an error. \p edgeData is nullable, which is equivalent
- * to passing default (zeroed) data for each edge.
- *
- * Dependencies cannot be removed from graphs which contain allocation or free nodes.
- * Any attempt to do so will return an error.
- *
- * \param hGraph - Graph from which to remove dependencies
- * \param from - Array of nodes that provide the dependencies
- * \param to - Array of dependent nodes
- * \param edgeData - Optional array of edge data. If NULL, edge data is assumed to
- * be default (zeroed).
- * \param numDependencies - Number of dependencies to be removed
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddDependencies,
- * ::cuGraphGetEdges,
- * ::cuGraphNodeGetDependencies,
- * ::cuGraphNodeGetDependentNodes
- */
- CUresult CUDAAPI cuGraphRemoveDependencies_v2(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, const CUgraphEdgeData *edgeData, size_t numDependencies);
- /**
- * \brief Remove a node from the graph
- *
- * Removes \p hNode from its graph. This operation also severs any dependencies of other nodes
- * on \p hNode and vice versa.
- *
- * Nodes which belong to a graph which contains allocation or free nodes cannot be destroyed.
- * Any attempt to do so will return an error.
- *
- * \param hNode - Node to remove
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphAddEmptyNode,
- * ::cuGraphAddKernelNode,
- * ::cuGraphAddHostNode,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphAddMemsetNode
- */
- CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode);
- /**
- * \brief Creates an executable graph from a graph
- *
- * Instantiates \p hGraph as an executable graph. The graph is validated for any
- * structural constraints or intra-node constraints which were not previously
- * validated. If instantiation is successful, a handle to the instantiated graph
- * is returned in \p phGraphExec.
- *
- * The \p flags parameter controls the behavior of instantiation and subsequent
- * graph launches. Valid flags are:
- *
- * - ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH, which configures a
- * graph containing memory allocation nodes to automatically free any
- * unfreed memory allocations before the graph is relaunched.
- *
- * - ::CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH, which configures the graph for launch
- * from the device. If this flag is passed, the executable graph handle returned can be
- * used to launch the graph from both the host and device. This flag can only be used
- * on platforms which support unified addressing. This flag cannot be used in
- * conjunction with ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH.
- *
- * - ::CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY, which causes the graph
- * to use the priorities from the per-node attributes rather than the priority
- * of the launch stream during execution. Note that priorities are only available
- * on kernel nodes, and are copied from stream priority during stream capture.
- *
- * If \p hGraph contains any allocation or free nodes, there can be at most one
- * executable graph in existence for that graph at a time. An attempt to instantiate
- * a second executable graph before destroying the first with ::cuGraphExecDestroy
- * will result in an error.
- * The same also applies if \p hGraph contains any device-updatable kernel nodes.
- *
- * If \p hGraph contains kernels which call device-side cudaGraphLaunch() from multiple
- * contexts, this will result in an error.
- *
- * Graphs instantiated for launch on the device have additional restrictions which do not
- * apply to host graphs:
- *
- * - The graph's nodes must reside on a single context.
- * - The graph can only contain kernel nodes, memcpy nodes, memset nodes, and child graph nodes.
- * - The graph cannot be empty and must contain at least one kernel, memcpy, or memset node.
- * Operation-specific restrictions are outlined below.
- * - Kernel nodes:
- * - Use of CUDA Dynamic Parallelism is not permitted.
- * - Cooperative launches are permitted as long as MPS is not in use.
- * - Memcpy nodes:
- * - Only copies involving device memory and/or pinned device-mapped host memory are permitted.
- * - Copies involving CUDA arrays are not permitted.
- * - Both operands must be accessible from the current context, and the current context must
- * match the context of other nodes in the graph.
- *
- * \param phGraphExec - Returns instantiated graph
- * \param hGraph - Graph to instantiate
- * \param flags - Flags to control instantiation. See ::CUgraphInstantiate_flags.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphInstantiate,
- * ::cuGraphCreate,
- * ::cuGraphUpload,
- * ::cuGraphLaunch,
- * ::cuGraphExecDestroy
- */
- CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, unsigned long long flags);
- /**
- * \brief Creates an executable graph from a graph
- *
- * Instantiates \p hGraph as an executable graph according to the \p instantiateParams structure.
- * The graph is validated for any structural constraints or intra-node constraints
- * which were not previously validated. If instantiation is successful, a handle to
- * the instantiated graph is returned in \p phGraphExec.
- *
- * \p instantiateParams controls the behavior of instantiation and subsequent
- * graph launches, as well as returning more detailed information in the event of an error.
- * ::CUDA_GRAPH_INSTANTIATE_PARAMS is defined as:
- *
- * \code
- typedef struct {
- cuuint64_t flags;
- CUstream hUploadStream;
- CUgraphNode hErrNode_out;
- CUgraphInstantiateResult result_out;
- } CUDA_GRAPH_INSTANTIATE_PARAMS;
- * \endcode
- *
- * The \p flags field controls the behavior of instantiation and subsequent
- * graph launches. Valid flags are:
- *
- * - ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH, which configures a
- * graph containing memory allocation nodes to automatically free any
- * unfreed memory allocations before the graph is relaunched.
- *
- * - ::CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD, which will perform an upload of the graph
- * into \p hUploadStream once the graph has been instantiated.
- *
- * - ::CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH, which configures the graph for launch
- * from the device. If this flag is passed, the executable graph handle returned can be
- * used to launch the graph from both the host and device. This flag can only be used
- * on platforms which support unified addressing. This flag cannot be used in
- * conjunction with ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH.
- *
- * - ::CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY, which causes the graph
- * to use the priorities from the per-node attributes rather than the priority
- * of the launch stream during execution. Note that priorities are only available
- * on kernel nodes, and are copied from stream priority during stream capture.
- *
- * If \p hGraph contains any allocation or free nodes, there can be at most one
- * executable graph in existence for that graph at a time. An attempt to instantiate a
- * second executable graph before destroying the first with ::cuGraphExecDestroy will
- * result in an error.
- * The same also applies if \p hGraph contains any device-updatable kernel nodes.
- *
- * If \p hGraph contains kernels which call device-side cudaGraphLaunch() from multiple
- * contexts, this will result in an error.
- *
- * Graphs instantiated for launch on the device have additional restrictions which do not
- * apply to host graphs:
- *
- * - The graph's nodes must reside on a single context.
- * - The graph can only contain kernel nodes, memcpy nodes, memset nodes, and child graph nodes.
- * - The graph cannot be empty and must contain at least one kernel, memcpy, or memset node.
- * Operation-specific restrictions are outlined below.
- * - Kernel nodes:
- * - Use of CUDA Dynamic Parallelism is not permitted.
- * - Cooperative launches are permitted as long as MPS is not in use.
- * - Memcpy nodes:
- * - Only copies involving device memory and/or pinned device-mapped host memory are permitted.
- * - Copies involving CUDA arrays are not permitted.
- * - Both operands must be accessible from the current context, and the current context must
- * match the context of other nodes in the graph.
- *
- * In the event of an error, the \p result_out and \p hErrNode_out fields will contain more
- * information about the nature of the error. Possible error reporting includes:
- *
- * - ::CUDA_GRAPH_INSTANTIATE_ERROR, if passed an invalid value or if an unexpected error occurred
- * which is described by the return value of the function. \p hErrNode_out will be set to NULL.
- * - ::CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE, if the graph structure is invalid. \p hErrNode_out
- * will be set to one of the offending nodes.
- * - ::CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED, if the graph is instantiated for device
- * launch but contains a node of an unsupported node type, or a node which performs unsupported
- * operations, such as use of CUDA dynamic parallelism within a kernel node. \p hErrNode_out will
- * be set to this node.
- * - ::CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED, if the graph is instantiated for device
- * launch but a node’s context differs from that of another node. This error can also be returned
- * if a graph is not instantiated for device launch and it contains kernels which call device-side
- * cudaGraphLaunch() from multiple contexts. \p hErrNode_out will be set to this node.
- *
- * If instantiation is successful, \p result_out will be set to ::CUDA_GRAPH_INSTANTIATE_SUCCESS,
- * and \p hErrNode_out will be set to NULL.
- *
- * \param phGraphExec - Returns instantiated graph
- * \param hGraph - Graph to instantiate
- * \param instantiateParams - Instantiation parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphCreate,
- * ::cuGraphInstantiate,
- * ::cuGraphExecDestroy
- */
- CUresult CUDAAPI cuGraphInstantiateWithParams(CUgraphExec *phGraphExec, CUgraph hGraph, CUDA_GRAPH_INSTANTIATE_PARAMS *instantiateParams);
- /**
- * \brief Query the instantiation flags of an executable graph
- *
- * Returns the flags that were passed to instantiation for the given executable graph.
- * ::CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD will not be returned by this API as it does
- * not affect the resulting executable graph.
- *
- * \param hGraphExec - The executable graph to query
- * \param flags - Returns the instantiation flags
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphInstantiate,
- * ::cuGraphInstantiateWithParams
- */
- CUresult CUDAAPI cuGraphExecGetFlags(CUgraphExec hGraphExec, cuuint64_t *flags);
- /**
- * \brief Sets the parameters for a kernel node in the given graphExec
- *
- * Sets the parameters of a kernel node in an executable graph \p hGraphExec.
- * The node is identified by the corresponding node \p hNode in the
- * non-executable graph, from which the executable graph was instantiated.
- *
- * \p hNode must not have been removed from the original graph. All \p nodeParams
- * fields may change, but the following restrictions apply to \p func updates:
- *
- * - The owning context of the function cannot change.
- * - A node whose function originally did not use CUDA dynamic parallelism cannot be updated
- * to a function which uses CDP
- * - A node whose function originally did not make device-side update calls cannot be updated
- * to a function which makes device-side update calls.
- * - If \p hGraphExec was not instantiated for device launch, a node whose function originally
- * did not use device-side cudaGraphLaunch() cannot be updated to a function which uses
- * device-side cudaGraphLaunch() unless the node resides on the same context as nodes which
- * contained such calls at instantiate-time. If no such calls were present at instantiation,
- * these updates cannot be performed at all.
- *
- * The modifications only affect future launches of \p hGraphExec. Already
- * enqueued or running launches of \p hGraphExec are not affected by this call.
- * \p hNode is also not modified by this call.
- *
- * If \p hNode is a device-updatable kernel node, the next upload/launch of \p hGraphExec
- * will overwrite any previous device-side updates. Additionally, applying host updates to a
- * device-updatable kernel node while it is being updated from the device will result in
- * undefined behavior.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - kernel node from the graph from which graphExec was instantiated
- * \param nodeParams - Updated Parameters to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuGraphAddKernelNode,
- * ::cuGraphKernelNodeSetParams,
- * ::cuGraphExecMemcpyNodeSetParams,
- * ::cuGraphExecMemsetNodeSetParams,
- * ::cuGraphExecHostNodeSetParams,
- * ::cuGraphExecChildGraphNodeSetParams,
- * ::cuGraphExecEventRecordNodeSetEvent,
- * ::cuGraphExecEventWaitNodeSetEvent,
- * ::cuGraphExecExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphExecExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams);
- /**
- * \brief Sets the parameters for a memcpy node in the given graphExec.
- *
- * Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had
- * contained \p copyParams at instantiation. hNode must remain in the graph which was
- * used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored.
- *
- * The source and destination memory in \p copyParams must be allocated from the same
- * contexts as the original source and destination memory. Both the instantiation-time
- * memory operands and the memory operands in \p copyParams must be 1-dimensional.
- * Zero-length operations are not supported.
- *
- * The modifications only affect future launches of \p hGraphExec. Already enqueued
- * or running launches of \p hGraphExec are not affected by this call. hNode is also
- * not modified by this call.
- *
- * Returns CUDA_ERROR_INVALID_VALUE if the memory operands' mappings changed or
- * either the original or new memory operands are multidimensional.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - Memcpy node from the graph which was used to instantiate graphExec
- * \param copyParams - The updated parameters to set
- * \param ctx - Context on which to run the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuGraphAddMemcpyNode,
- * ::cuGraphMemcpyNodeSetParams,
- * ::cuGraphExecKernelNodeSetParams,
- * ::cuGraphExecMemsetNodeSetParams,
- * ::cuGraphExecHostNodeSetParams,
- * ::cuGraphExecChildGraphNodeSetParams,
- * ::cuGraphExecEventRecordNodeSetEvent,
- * ::cuGraphExecEventWaitNodeSetEvent,
- * ::cuGraphExecExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphExecExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D *copyParams, CUcontext ctx);
- /**
- * \brief Sets the parameters for a memset node in the given graphExec.
- *
- * Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had
- * contained \p memsetParams at instantiation. hNode must remain in the graph which was
- * used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored.
- *
- * Zero sized operations are not supported.
- *
- * The new destination pointer in memsetParams must be to the same kind of allocation
- * as the original destination pointer and have the same context association and device mapping
- * as the original destination pointer.
- *
- * Both the value and pointer address may be updated.
- * Changing other aspects of the memset (width, height, element size or pitch) may cause the update to be rejected.
- * Specifically, for 2d memsets, all dimension changes are rejected.
- * For 1d memsets, changes in height are explicitly rejected and other changes are oportunistically allowed
- * if the resulting work maps onto the work resources already allocated for the node.
- *
- * The modifications only affect future launches of \p hGraphExec. Already enqueued
- * or running launches of \p hGraphExec are not affected by this call. hNode is also
- * not modified by this call.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - Memset node from the graph which was used to instantiate graphExec
- * \param memsetParams - The updated parameters to set
- * \param ctx - Context on which to run the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuGraphAddMemsetNode,
- * ::cuGraphMemsetNodeSetParams,
- * ::cuGraphExecKernelNodeSetParams,
- * ::cuGraphExecMemcpyNodeSetParams,
- * ::cuGraphExecHostNodeSetParams,
- * ::cuGraphExecChildGraphNodeSetParams,
- * ::cuGraphExecEventRecordNodeSetEvent,
- * ::cuGraphExecEventWaitNodeSetEvent,
- * ::cuGraphExecExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphExecExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx);
- /**
- * \brief Sets the parameters for a host node in the given graphExec.
- *
- * Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had
- * contained \p nodeParams at instantiation. hNode must remain in the graph which was
- * used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored.
- *
- * The modifications only affect future launches of \p hGraphExec. Already enqueued
- * or running launches of \p hGraphExec are not affected by this call. hNode is also
- * not modified by this call.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - Host node from the graph which was used to instantiate graphExec
- * \param nodeParams - The updated parameters to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuGraphAddHostNode,
- * ::cuGraphHostNodeSetParams,
- * ::cuGraphExecKernelNodeSetParams,
- * ::cuGraphExecMemcpyNodeSetParams,
- * ::cuGraphExecMemsetNodeSetParams,
- * ::cuGraphExecChildGraphNodeSetParams,
- * ::cuGraphExecEventRecordNodeSetEvent,
- * ::cuGraphExecEventWaitNodeSetEvent,
- * ::cuGraphExecExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphExecExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams);
- /**
- * \brief Updates node parameters in the child graph node in the given graphExec.
- *
- * Updates the work represented by \p hNode in \p hGraphExec as though the nodes contained
- * in \p hNode's graph had the parameters contained in \p childGraph's nodes at instantiation.
- * \p hNode must remain in the graph which was used to instantiate \p hGraphExec.
- * Changed edges to and from \p hNode are ignored.
- *
- * The modifications only affect future launches of \p hGraphExec. Already enqueued
- * or running launches of \p hGraphExec are not affected by this call. \p hNode is also
- * not modified by this call.
- *
- * The topology of \p childGraph, as well as the node insertion order, must match that
- * of the graph contained in \p hNode. See ::cuGraphExecUpdate() for a list of restrictions
- * on what can be updated in an instantiated graph. The update is recursive, so child graph
- * nodes contained within the top level child graph will also be updated.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - Host node from the graph which was used to instantiate graphExec
- * \param childGraph - The graph supplying the updated parameters
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuGraphAddChildGraphNode,
- * ::cuGraphChildGraphNodeGetGraph,
- * ::cuGraphExecKernelNodeSetParams,
- * ::cuGraphExecMemcpyNodeSetParams,
- * ::cuGraphExecMemsetNodeSetParams,
- * ::cuGraphExecHostNodeSetParams,
- * ::cuGraphExecEventRecordNodeSetEvent,
- * ::cuGraphExecEventWaitNodeSetEvent,
- * ::cuGraphExecExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphExecExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph);
- /**
- * \brief Sets the event for an event record node in the given graphExec
- *
- * Sets the event of an event record node in an executable graph \p hGraphExec.
- * The node is identified by the corresponding node \p hNode in the
- * non-executable graph, from which the executable graph was instantiated.
- *
- * The modifications only affect future launches of \p hGraphExec. Already
- * enqueued or running launches of \p hGraphExec are not affected by this call.
- * \p hNode is also not modified by this call.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - event record node from the graph from which graphExec was instantiated
- * \param event - Updated event to use
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuGraphAddEventRecordNode,
- * ::cuGraphEventRecordNodeGetEvent,
- * ::cuGraphEventWaitNodeSetEvent,
- * ::cuEventRecordWithFlags,
- * ::cuStreamWaitEvent,
- * ::cuGraphExecKernelNodeSetParams,
- * ::cuGraphExecMemcpyNodeSetParams,
- * ::cuGraphExecMemsetNodeSetParams,
- * ::cuGraphExecHostNodeSetParams,
- * ::cuGraphExecChildGraphNodeSetParams,
- * ::cuGraphExecEventWaitNodeSetEvent,
- * ::cuGraphExecExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphExecExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
- /**
- * \brief Sets the event for an event wait node in the given graphExec
- *
- * Sets the event of an event wait node in an executable graph \p hGraphExec.
- * The node is identified by the corresponding node \p hNode in the
- * non-executable graph, from which the executable graph was instantiated.
- *
- * The modifications only affect future launches of \p hGraphExec. Already
- * enqueued or running launches of \p hGraphExec are not affected by this call.
- * \p hNode is also not modified by this call.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - event wait node from the graph from which graphExec was instantiated
- * \param event - Updated event to use
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuGraphAddEventWaitNode,
- * ::cuGraphEventWaitNodeGetEvent,
- * ::cuGraphEventRecordNodeSetEvent,
- * ::cuEventRecordWithFlags,
- * ::cuStreamWaitEvent,
- * ::cuGraphExecKernelNodeSetParams,
- * ::cuGraphExecMemcpyNodeSetParams,
- * ::cuGraphExecMemsetNodeSetParams,
- * ::cuGraphExecHostNodeSetParams,
- * ::cuGraphExecChildGraphNodeSetParams,
- * ::cuGraphExecEventRecordNodeSetEvent,
- * ::cuGraphExecExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphExecExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
- /**
- * \brief Sets the parameters for an external semaphore signal node in the given graphExec
- *
- * Sets the parameters of an external semaphore signal node in an executable graph \p hGraphExec.
- * The node is identified by the corresponding node \p hNode in the
- * non-executable graph, from which the executable graph was instantiated.
- *
- * \p hNode must not have been removed from the original graph.
- *
- * The modifications only affect future launches of \p hGraphExec. Already
- * enqueued or running launches of \p hGraphExec are not affected by this call.
- * \p hNode is also not modified by this call.
- *
- * Changing \p nodeParams->numExtSems is not supported.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - semaphore signal node from the graph from which graphExec was instantiated
- * \param nodeParams - Updated Parameters to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuGraphAddExternalSemaphoresSignalNode,
- * ::cuImportExternalSemaphore,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync,
- * ::cuGraphExecKernelNodeSetParams,
- * ::cuGraphExecMemcpyNodeSetParams,
- * ::cuGraphExecMemsetNodeSetParams,
- * ::cuGraphExecHostNodeSetParams,
- * ::cuGraphExecChildGraphNodeSetParams,
- * ::cuGraphExecEventRecordNodeSetEvent,
- * ::cuGraphExecEventWaitNodeSetEvent,
- * ::cuGraphExecExternalSemaphoresWaitNodeSetParams,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams);
- /**
- * \brief Sets the parameters for an external semaphore wait node in the given graphExec
- *
- * Sets the parameters of an external semaphore wait node in an executable graph \p hGraphExec.
- * The node is identified by the corresponding node \p hNode in the
- * non-executable graph, from which the executable graph was instantiated.
- *
- * \p hNode must not have been removed from the original graph.
- *
- * The modifications only affect future launches of \p hGraphExec. Already
- * enqueued or running launches of \p hGraphExec are not affected by this call.
- * \p hNode is also not modified by this call.
- *
- * Changing \p nodeParams->numExtSems is not supported.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - semaphore wait node from the graph from which graphExec was instantiated
- * \param nodeParams - Updated Parameters to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphExecNodeSetParams,
- * ::cuGraphAddExternalSemaphoresWaitNode,
- * ::cuImportExternalSemaphore,
- * ::cuSignalExternalSemaphoresAsync,
- * ::cuWaitExternalSemaphoresAsync,
- * ::cuGraphExecKernelNodeSetParams,
- * ::cuGraphExecMemcpyNodeSetParams,
- * ::cuGraphExecMemsetNodeSetParams,
- * ::cuGraphExecHostNodeSetParams,
- * ::cuGraphExecChildGraphNodeSetParams,
- * ::cuGraphExecEventRecordNodeSetEvent,
- * ::cuGraphExecEventWaitNodeSetEvent,
- * ::cuGraphExecExternalSemaphoresSignalNodeSetParams,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams);
- /**
- * \brief Enables or disables the specified node in the given graphExec
- *
- * Sets \p hNode to be either enabled or disabled. Disabled nodes are functionally equivalent
- * to empty nodes until they are reenabled. Existing node parameters are not affected by
- * disabling/enabling the node.
- *
- * The node is identified by the corresponding node \p hNode in the non-executable
- * graph, from which the executable graph was instantiated.
- *
- * \p hNode must not have been removed from the original graph.
- *
- * The modifications only affect future launches of \p hGraphExec. Already
- * enqueued or running launches of \p hGraphExec are not affected by this call.
- * \p hNode is also not modified by this call.
- *
- * If \p hNode is a device-updatable kernel node, the next upload/launch of \p hGraphExec
- * will overwrite any previous device-side updates. Additionally, applying host updates to a
- * device-updatable kernel node while it is being updated from the device will result in
- * undefined behavior.
- *
- * \note Currently only kernel, memset and memcpy nodes are supported.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - Node from the graph from which graphExec was instantiated
- * \param isEnabled - Node is enabled if != 0, otherwise the node is disabled
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeGetEnabled,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- * ::cuGraphLaunch
- */
- CUresult CUDAAPI cuGraphNodeSetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int isEnabled);
- /**
- * \brief Query whether a node in the given graphExec is enabled
- *
- * Sets isEnabled to 1 if \p hNode is enabled, or 0 if \p hNode is disabled.
- *
- * The node is identified by the corresponding node \p hNode in the non-executable
- * graph, from which the executable graph was instantiated.
- *
- * \p hNode must not have been removed from the original graph.
- *
- * \note Currently only kernel, memset and memcpy nodes are supported.
- * \note This function will not reflect device-side updates for device-updatable kernel nodes.
- *
- * \param hGraphExec - The executable graph in which to set the specified node
- * \param hNode - Node from the graph from which graphExec was instantiated
- * \param isEnabled - Location to return the enabled status of the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphNodeSetEnabled,
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- * ::cuGraphLaunch
- */
- CUresult CUDAAPI cuGraphNodeGetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int *isEnabled);
- /**
- * \brief Uploads an executable graph in a stream
- *
- * Uploads \p hGraphExec to the device in \p hStream without executing it. Uploads of
- * the same \p hGraphExec will be serialized. Each upload is ordered behind both any
- * previous work in \p hStream and any previous launches of \p hGraphExec.
- * Uses memory cached by \p stream to back the allocations owned by \p hGraphExec.
- *
- * \param hGraphExec - Executable graph to upload
- * \param hStream - Stream in which to upload the graph
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphInstantiate,
- * ::cuGraphLaunch,
- * ::cuGraphExecDestroy
- */
- CUresult CUDAAPI cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream);
- /**
- * \brief Launches an executable graph in a stream
- *
- * Executes \p hGraphExec in \p hStream. Only one instance of \p hGraphExec may be executing
- * at a time. Each launch is ordered behind both any previous work in \p hStream
- * and any previous launches of \p hGraphExec. To execute a graph concurrently, it must be
- * instantiated multiple times into multiple executable graphs.
- *
- * If any allocations created by \p hGraphExec remain unfreed (from a previous launch) and
- * \p hGraphExec was not instantiated with ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH,
- * the launch will fail with ::CUDA_ERROR_INVALID_VALUE.
- *
- * \param hGraphExec - Executable graph to launch
- * \param hStream - Stream in which to launch the graph
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphInstantiate,
- * ::cuGraphUpload,
- * ::cuGraphExecDestroy
- */
- CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream);
- /**
- * \brief Destroys an executable graph
- *
- * Destroys the executable graph specified by \p hGraphExec, as well
- * as all of its executable nodes. If the executable graph is
- * in-flight, it will not be terminated, but rather freed
- * asynchronously on completion.
- *
- * \param hGraphExec - Executable graph to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphInstantiate,
- * ::cuGraphUpload,
- * ::cuGraphLaunch
- */
- CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec);
- /**
- * \brief Destroys a graph
- *
- * Destroys the graph specified by \p hGraph, as well as all of its nodes.
- *
- * \param hGraph - Graph to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphCreate
- */
- CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph);
- /**
- * \brief Check whether an executable graph can be updated with a graph and perform the update if possible
- *
- * Updates the node parameters in the instantiated graph specified by \p hGraphExec with the
- * node parameters in a topologically identical graph specified by \p hGraph.
- *
- * Limitations:
- *
- * - Kernel nodes:
- * - The owning context of the function cannot change.
- * - A node whose function originally did not use CUDA dynamic parallelism cannot be updated
- * to a function which uses CDP.
- * - A node whose function originally did not make device-side update calls cannot be updated
- * to a function which makes device-side update calls.
- * - A cooperative node cannot be updated to a non-cooperative node, and vice-versa.
- * - If the graph was instantiated with CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY, the
- * priority attribute cannot change. Equality is checked on the originally requested
- * priority values, before they are clamped to the device's supported range.
- * - If \p hGraphExec was not instantiated for device launch, a node whose function originally
- * did not use device-side cudaGraphLaunch() cannot be updated to a function which uses
- * device-side cudaGraphLaunch() unless the node resides on the same context as nodes which
- * contained such calls at instantiate-time. If no such calls were present at instantiation,
- * these updates cannot be performed at all.
- * - Neither \p hGraph nor \p hGraphExec may contain device-updatable kernel nodes.
- * - Memset and memcpy nodes:
- * - The CUDA device(s) to which the operand(s) was allocated/mapped cannot change.
- * - The source/destination memory must be allocated from the same contexts as the original
- * source/destination memory.
- * - For 2d memsets, only address and assinged value may be updated.
- * - For 1d memsets, updating dimensions is also allowed, but may fail if the resulting operation doesn't
- * map onto the work resources already allocated for the node.
- * - Additional memcpy node restrictions:
- * - Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE,
- * CU_MEMORYTYPE_ARRAY, etc.) is not supported.
- * - External semaphore wait nodes and record nodes:
- * - Changing the number of semaphores is not supported.
- * - Conditional nodes:
- * - Changing node parameters is not supported.
- * - Changeing parameters of nodes within the conditional body graph is subject to the rules above.
- * - Conditional handle flags and default values are updated as part of the graph update.
- *
- * Note: The API may add further restrictions in future releases. The return code should always be checked.
- *
- * cuGraphExecUpdate sets the result member of \p resultInfo to CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED
- * under the following conditions:
- * - The count of nodes directly in \p hGraphExec and \p hGraph differ, in which case resultInfo->errorNode
- * is set to NULL.
- * - \p hGraph has more exit nodes than \p hGraph, in which case resultInfo->errorNode is set to one of
- * the exit nodes in hGraph.
- * - A node in \p hGraph has a different number of dependencies than the node from \p hGraphExec it is paired with,
- * in which case resultInfo->errorNode is set to the node from \p hGraph.
- * - A node in \p hGraph has a dependency that does not match with the corresponding dependency of the paired node
- * from \p hGraphExec. resultInfo->errorNode will be set to the node from \p hGraph. resultInfo->errorFromNode
- * will be set to the mismatched dependency. The dependencies are paired based on edge order and a dependency
- * does not match when the nodes are already paired based on other edges examined in the graph.
- *
- * cuGraphExecUpdate sets the result member of \p resultInfo to:
- * - CU_GRAPH_EXEC_UPDATE_ERROR if passed an invalid value.
- * - CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED if the graph topology changed
- * - CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED if the type of a node changed, in which case
- * \p hErrorNode_out is set to the node from \p hGraph.
- * - CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE if the function changed in an unsupported
- * way(see note above), in which case \p hErrorNode_out is set to the node from \p hGraph
- * - CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED if any parameters to a node changed in a way
- * that is not supported, in which case \p hErrorNode_out is set to the node from \p hGraph.
- * - CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED if any attributes of a node changed in a way
- * that is not supported, in which case \p hErrorNode_out is set to the node from \p hGraph.
- * - CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED if something about a node is unsupported, like
- * the node's type or configuration, in which case \p hErrorNode_out is set to the node from \p hGraph
- *
- * If the update fails for a reason not listed above, the result member of \p resultInfo will be set
- * to CU_GRAPH_EXEC_UPDATE_ERROR. If the update succeeds, the result member will be set to CU_GRAPH_EXEC_UPDATE_SUCCESS.
- *
- * cuGraphExecUpdate returns CUDA_SUCCESS when the updated was performed successfully. It returns
- * CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE if the graph update was not performed because it included
- * changes which violated constraints specific to instantiated graph update.
- *
- * \param hGraphExec The instantiated graph to be updated
- * \param hGraph The graph containing the updated parameters
- * \param resultInfo the error info structure
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE,
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphExecUpdateResultInfo *resultInfo);
- /**
- * \brief Copies attributes from source node to destination node.
- *
- * Copies attributes from source node \p src to destination node \p dst.
- * Both node must have the same context.
- *
- * \param[out] dst Destination node
- * \param[in] src Source node
- * For list of attributes see ::CUkernelNodeAttrID
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::CUaccessPolicyWindow
- */
- CUresult CUDAAPI cuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src);
- /**
- * \brief Queries node attribute.
- *
- * Queries attribute \p attr from node \p hNode and stores it in corresponding
- * member of \p value_out.
- *
- * \param[in] hNode
- * \param[in] attr
- * \param[out] value_out
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa
- * ::CUaccessPolicyWindow
- */
- CUresult CUDAAPI cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr,
- CUkernelNodeAttrValue *value_out);
-
- /**
- * \brief Sets node attribute.
- *
- * Sets attribute \p attr on node \p hNode from corresponding attribute of
- * \p value.
- *
- * \param[out] hNode
- * \param[in] attr
- * \param[out] value
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE
- * \notefnerr
- *
- * \sa
- * ::CUaccessPolicyWindow
- */
- CUresult CUDAAPI cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr,
- const CUkernelNodeAttrValue *value);
- /**
- * \brief Write a DOT file describing graph structure
- *
- * Using the provided \p hGraph, write to \p path a DOT formatted description of the graph.
- * By default this includes the graph topology, node types, node id, kernel names and memcpy direction.
- * \p flags can be specified to write more detailed information about each node type such as
- * parameter values, kernel attributes, node and function handles.
- *
- * \param hGraph - The graph to create a DOT file from
- * \param path - The path to write the DOT file to
- * \param flags - Flags from CUgraphDebugDot_flags for specifying which additional node information to write
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OPERATING_SYSTEM
- */
- CUresult CUDAAPI cuGraphDebugDotPrint(CUgraph hGraph, const char *path, unsigned int flags);
- /**
- * \brief Create a user object
- *
- * Create a user object with the specified destructor callback and initial reference count. The
- * initial references are owned by the caller.
- *
- * Destructor callbacks cannot make CUDA API calls and should avoid blocking behavior, as they
- * are executed by a shared internal thread. Another thread may be signaled to perform such
- * actions, if it does not block forward progress of tasks scheduled through CUDA.
- *
- * See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
- *
- * \param object_out - Location to return the user object handle
- * \param ptr - The pointer to pass to the destroy function
- * \param destroy - Callback to free the user object when it is no longer in use
- * \param initialRefcount - The initial refcount to create the object with, typically 1. The
- * initial references are owned by the calling thread.
- * \param flags - Currently it is required to pass ::CU_USER_OBJECT_NO_DESTRUCTOR_SYNC,
- * which is the only defined flag. This indicates that the destroy
- * callback cannot be waited on by any CUDA API. Users requiring
- * synchronization of the callback should signal its completion
- * manually.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuUserObjectRetain,
- * ::cuUserObjectRelease,
- * ::cuGraphRetainUserObject,
- * ::cuGraphReleaseUserObject,
- * ::cuGraphCreate
- */
- CUresult CUDAAPI cuUserObjectCreate(CUuserObject *object_out, void *ptr, CUhostFn destroy,
- unsigned int initialRefcount, unsigned int flags);
- /**
- * \brief Retain a reference to a user object
- *
- * Retains new references to a user object. The new references are owned by the caller.
- *
- * See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
- *
- * \param object - The object to retain
- * \param count - The number of references to retain, typically 1. Must be nonzero
- * and not larger than INT_MAX.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuUserObjectCreate,
- * ::cuUserObjectRelease,
- * ::cuGraphRetainUserObject,
- * ::cuGraphReleaseUserObject,
- * ::cuGraphCreate
- */
- CUresult CUDAAPI cuUserObjectRetain(CUuserObject object, unsigned int count);
- /**
- * \brief Release a reference to a user object
- *
- * Releases user object references owned by the caller. The object's destructor is invoked if
- * the reference count reaches zero.
- *
- * It is undefined behavior to release references not owned by the caller, or to use a user
- * object handle after all references are released.
- *
- * See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
- *
- * \param object - The object to release
- * \param count - The number of references to release, typically 1. Must be nonzero
- * and not larger than INT_MAX.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuUserObjectCreate,
- * ::cuUserObjectRetain,
- * ::cuGraphRetainUserObject,
- * ::cuGraphReleaseUserObject,
- * ::cuGraphCreate
- */
- CUresult CUDAAPI cuUserObjectRelease(CUuserObject object, unsigned int count);
- /**
- * \brief Retain a reference to a user object from a graph
- *
- * Creates or moves user object references that will be owned by a CUDA graph.
- *
- * See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
- *
- * \param graph - The graph to associate the reference with
- * \param object - The user object to retain a reference for
- * \param count - The number of references to add to the graph, typically 1. Must be
- * nonzero and not larger than INT_MAX.
- * \param flags - The optional flag ::CU_GRAPH_USER_OBJECT_MOVE transfers references
- * from the calling thread, rather than create new references. Pass 0
- * to create new references.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuUserObjectCreate,
- * ::cuUserObjectRetain,
- * ::cuUserObjectRelease,
- * ::cuGraphReleaseUserObject,
- * ::cuGraphCreate
- */
- CUresult CUDAAPI cuGraphRetainUserObject(CUgraph graph, CUuserObject object, unsigned int count, unsigned int flags);
- /**
- * \brief Release a user object reference from a graph
- *
- * Releases user object references owned by a graph.
- *
- * See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects.
- *
- * \param graph - The graph that will release the reference
- * \param object - The user object to release a reference for
- * \param count - The number of references to release, typically 1. Must be nonzero
- * and not larger than INT_MAX.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuUserObjectCreate,
- * ::cuUserObjectRetain,
- * ::cuUserObjectRelease,
- * ::cuGraphRetainUserObject,
- * ::cuGraphCreate
- */
- CUresult CUDAAPI cuGraphReleaseUserObject(CUgraph graph, CUuserObject object, unsigned int count);
- /**
- * \brief Adds a node of arbitrary type to a graph
- *
- * Creates a new node in \p hGraph described by \p nodeParams with \p numDependencies
- * dependencies specified via \p dependencies. \p numDependencies may be 0.
- * \p dependencies may be null if \p numDependencies is 0. \p dependencies may not have
- * any duplicate entries.
- *
- * \p nodeParams is a tagged union. The node type should be specified in the \p type field,
- * and type-specific parameters in the corresponding union member. All unused bytes - that
- * is, \p reserved0 and all bytes past the utilized union member - must be set to zero.
- * It is recommended to use brace initialization or memset to ensure all bytes are
- * initialized.
- *
- * Note that for some node types, \p nodeParams may contain "out parameters" which are
- * modified during the call, such as \p nodeParams->alloc.dptr.
- *
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param numDependencies - Number of dependencies
- * \param nodeParams - Specification of the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphCreate,
- * ::cuGraphNodeSetParams,
- * ::cuGraphExecNodeSetParams
- */
- CUresult CUDAAPI cuGraphAddNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUgraphNodeParams *nodeParams);
- /**
- * \brief Adds a node of arbitrary type to a graph (12.3+)
- *
- * Creates a new node in \p hGraph described by \p nodeParams with \p numDependencies
- * dependencies specified via \p dependencies. \p numDependencies may be 0.
- * \p dependencies may be null if \p numDependencies is 0. \p dependencies may not have
- * any duplicate entries.
- *
- * \p nodeParams is a tagged union. The node type should be specified in the \p type field,
- * and type-specific parameters in the corresponding union member. All unused bytes - that
- * is, \p reserved0 and all bytes past the utilized union member - must be set to zero.
- * It is recommended to use brace initialization or memset to ensure all bytes are
- * initialized.
- *
- * Note that for some node types, \p nodeParams may contain "out parameters" which are
- * modified during the call, such as \p nodeParams->alloc.dptr.
- *
- * A handle to the new node will be returned in \p phGraphNode.
- *
- * \param phGraphNode - Returns newly created node
- * \param hGraph - Graph to which to add the node
- * \param dependencies - Dependencies of the node
- * \param dependencyData - Optional edge data for the dependencies. If NULL, the data is
- * assumed to be default (zeroed) for all dependencies.
- * \param numDependencies - Number of dependencies
- * \param nodeParams - Specification of the node
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphCreate,
- * ::cuGraphNodeSetParams,
- * ::cuGraphExecNodeSetParams
- */
- CUresult CUDAAPI cuGraphAddNode_v2(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, const CUgraphEdgeData *dependencyData, size_t numDependencies, CUgraphNodeParams *nodeParams);
- /**
- * \brief Update's a graph node's parameters
- *
- * Sets the parameters of graph node \p hNode to \p nodeParams. The node type specified by
- * \p nodeParams->type must match the type of \p hNode. \p nodeParams must be fully
- * initialized and all unused bytes (reserved, padding) zeroed.
- *
- * Modifying parameters is not supported for node types CU_GRAPH_NODE_TYPE_MEM_ALLOC and
- * CU_GRAPH_NODE_TYPE_MEM_FREE.
- *
- * \param hNode - Node to set the parameters for
- * \param nodeParams - Parameters to copy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphExecNodeSetParams
- */
- CUresult CUDAAPI cuGraphNodeSetParams(CUgraphNode hNode, CUgraphNodeParams *nodeParams);
- /**
- * \brief Update's a graph node's parameters in an instantiated graph
- *
- * Sets the parameters of a node in an executable graph \p hGraphExec. The node is identified
- * by the corresponding node \p hNode in the non-executable graph from which the executable
- * graph was instantiated. \p hNode must not have been removed from the original graph.
- *
- * The modifications only affect future launches of \p hGraphExec. Already
- * enqueued or running launches of \p hGraphExec are not affected by this call.
- * \p hNode is also not modified by this call.
- *
- * Allowed changes to parameters on executable graphs are as follows:
- * <table>
- * <tr><th>Node type<th>Allowed changes
- * <tr><td>kernel<td>See ::cuGraphExecKernelNodeSetParams
- * <tr><td>memcpy<td>Addresses for 1-dimensional copies if allocated in same context; see ::cuGraphExecMemcpyNodeSetParams
- * <tr><td>memset<td>Addresses for 1-dimensional memsets if allocated in same context; see ::cuGraphExecMemsetNodeSetParams
- * <tr><td>host<td>Unrestricted
- * <tr><td>child graph<td>Topology must match and restrictions apply recursively; see ::cuGraphExecUpdate
- * <tr><td>event wait<td>Unrestricted
- * <tr><td>event record<td>Unrestricted
- * <tr><td>external semaphore signal<td>Number of semaphore operations cannot change
- * <tr><td>external semaphore wait<td>Number of semaphore operations cannot change
- * <tr><td>memory allocation<td>API unsupported
- * <tr><td>memory free<td>API unsupported
- * <tr><td>batch memops<td>Addresses, values, and operation type for wait operations; see ::cuGraphExecBatchMemOpNodeSetParams
- * </table>
- *
- * \param hGraphExec - The executable graph in which to update the specified node
- * \param hNode - Corresponding node from the graph from which graphExec was instantiated
- * \param nodeParams - Updated Parameters to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode,
- * ::cuGraphNodeSetParams
- * ::cuGraphExecUpdate,
- * ::cuGraphInstantiate
- */
- CUresult CUDAAPI cuGraphExecNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraphNodeParams *nodeParams);
- /**
- * \brief Create a conditional handle
- *
- * Creates a conditional handle associated with \p hGraph.
- *
- * The conditional handle must be associated with a conditional node in this graph or one of its children.
- *
- * Handles not associated with a conditional node may cause graph instantiation to fail.
- *
- * Handles can only be set from the context with which they are associated.
- *
- * \param pHandle_out - Pointer used to return the handle to the caller.
- * \param hGraph - Graph which will contain the conditional node using this handle.
- * \param ctx - Context for the handle and associated conditional node.
- * \param defaultLaunchValue - Optional initial value for the conditional variable.
- * Applied at the beginning of each graph execution if CU_GRAPH_COND_ASSIGN_DEFAULT is set in \p flags.
- * \param flags - Currently must be CU_GRAPH_COND_ASSIGN_DEFAULT or 0.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \note_graph_thread_safety
- * \notefnerr
- *
- * \sa
- * ::cuGraphAddNode
- */
- CUresult CUDAAPI cuGraphConditionalHandleCreate(CUgraphConditionalHandle *pHandle_out, CUgraph hGraph, CUcontext ctx, unsigned int defaultLaunchValue, unsigned int flags);
- /** @} */ /* END CUDA_GRAPH */
- /**
- * \defgroup CUDA_OCCUPANCY Occupancy
- *
- * ___MANBRIEF___ occupancy calculation functions of the low-level CUDA driver
- * API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the occupancy calculation functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Returns occupancy of a function
- *
- * Returns in \p *numBlocks the number of the maximum active blocks per
- * streaming multiprocessor.
- *
- * Note that the API can also be used with context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to use for calculations
- * will be the current context.
- *
- * \param numBlocks - Returned occupancy
- * \param func - Kernel for which occupancy is calculated
- * \param blockSize - Block size the kernel is intended to be launched with
- * \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cudaOccupancyMaxActiveBlocksPerMultiprocessor
- */
- CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize);
- /**
- * \brief Returns occupancy of a function
- *
- * Returns in \p *numBlocks the number of the maximum active blocks per
- * streaming multiprocessor.
- *
- * The \p Flags parameter controls how special cases are handled. The
- * valid flags are:
- *
- * - ::CU_OCCUPANCY_DEFAULT, which maintains the default behavior as
- * ::cuOccupancyMaxActiveBlocksPerMultiprocessor;
- *
- * - ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE, which suppresses the
- * default behavior on platform where global caching affects
- * occupancy. On such platforms, if caching is enabled, but
- * per-block SM resource usage would result in zero occupancy, the
- * occupancy calculator will calculate the occupancy as if caching
- * is disabled. Setting ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE makes
- * the occupancy calculator to return 0 in such cases. More information
- * can be found about this feature in the "Unified L1/Texture Cache"
- * section of the Maxwell tuning guide.
- *
- * Note that the API can also be with launch context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to use for calculations
- * will be the current context.
- *
- * \param numBlocks - Returned occupancy
- * \param func - Kernel for which occupancy is calculated
- * \param blockSize - Block size the kernel is intended to be launched with
- * \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes
- * \param flags - Requested behavior for the occupancy calculator
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
- */
- CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags);
- /**
- * \brief Suggest a launch configuration with reasonable occupancy
- *
- * Returns in \p *blockSize a reasonable block size that can achieve
- * the maximum occupancy (or, the maximum number of active warps with
- * the fewest blocks per multiprocessor), and in \p *minGridSize the
- * minimum grid size to achieve the maximum occupancy.
- *
- * If \p blockSizeLimit is 0, the configurator will use the maximum
- * block size permitted by the device / function instead.
- *
- * If per-block dynamic shared memory allocation is not needed, the
- * user should leave both \p blockSizeToDynamicSMemSize and \p
- * dynamicSMemSize as 0.
- *
- * If per-block dynamic shared memory allocation is needed, then if
- * the dynamic shared memory size is constant regardless of block
- * size, the size should be passed through \p dynamicSMemSize, and \p
- * blockSizeToDynamicSMemSize should be NULL.
- *
- * Otherwise, if the per-block dynamic shared memory size varies with
- * different block sizes, the user needs to provide a unary function
- * through \p blockSizeToDynamicSMemSize that computes the dynamic
- * shared memory needed by \p func for any given block size. \p
- * dynamicSMemSize is ignored. An example signature is:
- *
- * \code
- * // Take block size, returns dynamic shared memory needed
- * size_t blockToSmem(int blockSize);
- * \endcode
- *
- * Note that the API can also be used with context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to use for calculations
- * will be the current context.
- *
- * \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy
- * \param blockSize - Returned maximum block size that can achieve the maximum occupancy
- * \param func - Kernel for which launch configuration is calculated
- * \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size
- * \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes
- * \param blockSizeLimit - The maximum block size \p func is designed to handle
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cudaOccupancyMaxPotentialBlockSize
- */
- CUresult CUDAAPI cuOccupancyMaxPotentialBlockSize(int *minGridSize, int *blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit);
- /**
- * \brief Suggest a launch configuration with reasonable occupancy
- *
- * An extended version of ::cuOccupancyMaxPotentialBlockSize. In
- * addition to arguments passed to ::cuOccupancyMaxPotentialBlockSize,
- * ::cuOccupancyMaxPotentialBlockSizeWithFlags also takes a \p Flags
- * parameter.
- *
- * The \p Flags parameter controls how special cases are handled. The
- * valid flags are:
- *
- * - ::CU_OCCUPANCY_DEFAULT, which maintains the default behavior as
- * ::cuOccupancyMaxPotentialBlockSize;
- *
- * - ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE, which suppresses the
- * default behavior on platform where global caching affects
- * occupancy. On such platforms, the launch configurations that
- * produces maximal occupancy might not support global
- * caching. Setting ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE
- * guarantees that the the produced launch configuration is global
- * caching compatible at a potential cost of occupancy. More information
- * can be found about this feature in the "Unified L1/Texture Cache"
- * section of the Maxwell tuning guide.
- *
- * Note that the API can also be used with context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to use for calculations
- * will be the current context.
- *
- * \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy
- * \param blockSize - Returned maximum block size that can achieve the maximum occupancy
- * \param func - Kernel for which launch configuration is calculated
- * \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size
- * \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes
- * \param blockSizeLimit - The maximum block size \p func is designed to handle
- * \param flags - Options
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cudaOccupancyMaxPotentialBlockSizeWithFlags
- */
- CUresult CUDAAPI cuOccupancyMaxPotentialBlockSizeWithFlags(int *minGridSize, int *blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags);
- /**
- * \brief Returns dynamic shared memory available per block when launching \p numBlocks blocks on SM
- *
- * Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory to allow \p numBlocks blocks per SM.
- *
- * Note that the API can also be used with context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to use for calculations
- * will be the current context.
- *
- * \param dynamicSmemSize - Returned maximum dynamic shared memory
- * \param func - Kernel function for which occupancy is calculated
- * \param numBlocks - Number of blocks to fit on SM
- * \param blockSize - Size of the blocks
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- */
- CUresult CUDAAPI cuOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, CUfunction func, int numBlocks, int blockSize);
- /**
- * \brief Given the kernel function (\p func) and launch configuration
- * (\p config), return the maximum cluster size in \p *clusterSize.
- *
- * The cluster dimensions in \p config are ignored. If func has a required
- * cluster size set (see ::cudaFuncGetAttributes / ::cuFuncGetAttribute),\p
- * *clusterSize will reflect the required cluster size.
- *
- * By default this function will always return a value that's portable on
- * future hardware. A higher value may be returned if the kernel function
- * allows non-portable cluster sizes.
- *
- * This function will respect the compile time launch bounds.
- *
- * Note that the API can also be used with context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to use for calculations
- * will either be taken from the specified stream \p config->hStream
- * or the current context in case of NULL stream.
- *
- * \param clusterSize - Returned maximum cluster size that can be launched
- * for the given kernel function and launch configuration
- * \param func - Kernel function for which maximum cluster
- * size is calculated
- * \param config - Launch configuration for the given kernel function
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cudaFuncGetAttributes,
- * ::cuFuncGetAttribute
- */
- CUresult CUDAAPI cuOccupancyMaxPotentialClusterSize(int *clusterSize, CUfunction func, const CUlaunchConfig *config);
- /**
- * \brief Given the kernel function (\p func) and launch configuration
- * (\p config), return the maximum number of clusters that could co-exist
- * on the target device in \p *numClusters.
- *
- * If the function has required cluster size already set (see
- * ::cudaFuncGetAttributes / ::cuFuncGetAttribute), the cluster size
- * from config must either be unspecified or match the required size.
- * Without required sizes, the cluster size must be specified in config,
- * else the function will return an error.
- *
- * Note that various attributes of the kernel function may affect occupancy
- * calculation. Runtime environment may affect how the hardware schedules
- * the clusters, so the calculated occupancy is not guaranteed to be achievable.
- *
- * Note that the API can also be used with context-less kernel ::CUkernel
- * by querying the handle using ::cuLibraryGetKernel() and then passing it
- * to the API by casting to ::CUfunction. Here, the context to use for calculations
- * will either be taken from the specified stream \p config->hStream
- * or the current context in case of NULL stream.
- *
- * \param numClusters - Returned maximum number of clusters that
- * could co-exist on the target device
- * \param func - Kernel function for which maximum number
- * of clusters are calculated
- * \param config - Launch configuration for the given kernel function
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_CLUSTER_SIZE,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cudaFuncGetAttributes,
- * ::cuFuncGetAttribute
- */
- CUresult CUDAAPI cuOccupancyMaxActiveClusters(int *numClusters, CUfunction func, const CUlaunchConfig *config);
- /** @} */ /* END CUDA_OCCUPANCY */
- /**
- * \defgroup CUDA_TEXREF_DEPRECATED Texture Reference Management [DEPRECATED]
- *
- * ___MANBRIEF___ deprecated texture reference management functions of the
- * low-level CUDA driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the deprecated texture reference management
- * functions of the low-level CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Binds an array as a texture reference
- *
- * \deprecated
- *
- * Binds the CUDA array \p hArray to the texture reference \p hTexRef. Any
- * previous address or CUDA array state associated with the texture reference
- * is superseded by this function. \p Flags must be set to
- * ::CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound to \p hTexRef is
- * unbound.
- *
- * \param hTexRef - Texture reference to bind
- * \param hArray - Array to bind
- * \param Flags - Options (must be ::CU_TRSA_OVERRIDE_FORMAT)
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
- /**
- * \brief Binds a mipmapped array to a texture reference
- *
- * \deprecated
- *
- * Binds the CUDA mipmapped array \p hMipmappedArray to the texture reference \p hTexRef.
- * Any previous address or CUDA array state associated with the texture reference
- * is superseded by this function. \p Flags must be set to ::CU_TRSA_OVERRIDE_FORMAT.
- * Any CUDA array previously bound to \p hTexRef is unbound.
- *
- * \param hTexRef - Texture reference to bind
- * \param hMipmappedArray - Mipmapped array to bind
- * \param Flags - Options (must be ::CU_TRSA_OVERRIDE_FORMAT)
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags);
- /**
- * \brief Binds an address as a texture reference
- *
- * \deprecated
- *
- * Binds a linear address range to the texture reference \p hTexRef. Any
- * previous address or CUDA array state associated with the texture reference
- * is superseded by this function. Any memory previously bound to \p hTexRef
- * is unbound.
- *
- * Since the hardware enforces an alignment requirement on texture base
- * addresses, ::cuTexRefSetAddress() passes back a byte offset in
- * \p *ByteOffset that must be applied to texture fetches in order to read from
- * the desired memory. This offset must be divided by the texel size and
- * passed to kernels that read from the texture so they can be applied to the
- * ::tex1Dfetch() function.
- *
- * If the device memory pointer was returned from ::cuMemAlloc(), the offset
- * is guaranteed to be 0 and NULL may be passed as the \p ByteOffset parameter.
- *
- * The total number of elements (or texels) in the linear address range
- * cannot exceed ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH.
- * The number of elements is computed as (\p bytes / bytesPerElement),
- * where bytesPerElement is determined from the data format and number of
- * components set using ::cuTexRefSetFormat().
- *
- * \param ByteOffset - Returned byte offset
- * \param hTexRef - Texture reference to bind
- * \param dptr - Device pointer to bind
- * \param bytes - Size of memory to bind in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
- /**
- * \brief Binds an address as a 2D texture reference
- *
- * \deprecated
- *
- * Binds a linear address range to the texture reference \p hTexRef. Any
- * previous address or CUDA array state associated with the texture reference
- * is superseded by this function. Any memory previously bound to \p hTexRef
- * is unbound.
- *
- * Using a ::tex2D() function inside a kernel requires a call to either
- * ::cuTexRefSetArray() to bind the corresponding texture reference to an
- * array, or ::cuTexRefSetAddress2D() to bind the texture reference to linear
- * memory.
- *
- * Function calls to ::cuTexRefSetFormat() cannot follow calls to
- * ::cuTexRefSetAddress2D() for the same texture reference.
- *
- * It is required that \p dptr be aligned to the appropriate hardware-specific
- * texture alignment. You can query this value using the device attribute
- * ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned \p dptr is
- * supplied, ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * \p Pitch has to be aligned to the hardware-specific texture pitch alignment.
- * This value can be queried using the device attribute
- * ::CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. If an unaligned \p Pitch is
- * supplied, ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * Width and Height, which are specified in elements (or texels), cannot exceed
- * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and
- * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively.
- * \p Pitch, which is specified in bytes, cannot exceed
- * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH.
- *
- * \param hTexRef - Texture reference to bind
- * \param desc - Descriptor of CUDA array
- * \param dptr - Device pointer to bind
- * \param Pitch - Line pitch in bytes
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
- /**
- * \brief Sets the format for a texture reference
- *
- * \deprecated
- *
- * Specifies the format of the data to be read by the texture reference
- * \p hTexRef. \p fmt and \p NumPackedComponents are exactly analogous to the
- * ::Format and ::NumChannels members of the ::CUDA_ARRAY_DESCRIPTOR structure:
- * They specify the format of each component and the number of components per
- * array element.
- *
- * \param hTexRef - Texture reference
- * \param fmt - Format to set
- * \param NumPackedComponents - Number of components per array element
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat,
- * ::cudaCreateChannelDesc
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
- /**
- * \brief Sets the addressing mode for a texture reference
- *
- * \deprecated
- *
- * Specifies the addressing mode \p am for the given dimension \p dim of the
- * texture reference \p hTexRef. If \p dim is zero, the addressing mode is
- * applied to the first parameter of the functions used to fetch from the
- * texture; if \p dim is 1, the second, and so on. ::CUaddress_mode is defined
- * as:
- * \code
- typedef enum CUaddress_mode_enum {
- CU_TR_ADDRESS_MODE_WRAP = 0,
- CU_TR_ADDRESS_MODE_CLAMP = 1,
- CU_TR_ADDRESS_MODE_MIRROR = 2,
- CU_TR_ADDRESS_MODE_BORDER = 3
- } CUaddress_mode;
- * \endcode
- *
- * Note that this call has no effect if \p hTexRef is bound to linear memory.
- * Also, if the flag, ::CU_TRSF_NORMALIZED_COORDINATES, is not set, the only
- * supported address mode is ::CU_TR_ADDRESS_MODE_CLAMP.
- *
- * \param hTexRef - Texture reference
- * \param dim - Dimension
- * \param am - Addressing mode to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
- /**
- * \brief Sets the filtering mode for a texture reference
- *
- * \deprecated
- *
- * Specifies the filtering mode \p fm to be used when reading memory through
- * the texture reference \p hTexRef. ::CUfilter_mode_enum is defined as:
- *
- * \code
- typedef enum CUfilter_mode_enum {
- CU_TR_FILTER_MODE_POINT = 0,
- CU_TR_FILTER_MODE_LINEAR = 1
- } CUfilter_mode;
- * \endcode
- *
- * Note that this call has no effect if \p hTexRef is bound to linear memory.
- *
- * \param hTexRef - Texture reference
- * \param fm - Filtering mode to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
- /**
- * \brief Sets the mipmap filtering mode for a texture reference
- *
- * \deprecated
- *
- * Specifies the mipmap filtering mode \p fm to be used when reading memory through
- * the texture reference \p hTexRef. ::CUfilter_mode_enum is defined as:
- *
- * \code
- typedef enum CUfilter_mode_enum {
- CU_TR_FILTER_MODE_POINT = 0,
- CU_TR_FILTER_MODE_LINEAR = 1
- } CUfilter_mode;
- * \endcode
- *
- * Note that this call has no effect if \p hTexRef is not bound to a mipmapped array.
- *
- * \param hTexRef - Texture reference
- * \param fm - Filtering mode to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm);
- /**
- * \brief Sets the mipmap level bias for a texture reference
- *
- * \deprecated
- *
- * Specifies the mipmap level bias \p bias to be added to the specified mipmap level when
- * reading memory through the texture reference \p hTexRef.
- *
- * Note that this call has no effect if \p hTexRef is not bound to a mipmapped array.
- *
- * \param hTexRef - Texture reference
- * \param bias - Mipmap level bias
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias);
- /**
- * \brief Sets the mipmap min/max mipmap level clamps for a texture reference
- *
- * \deprecated
- *
- * Specifies the min/max mipmap level clamps, \p minMipmapLevelClamp and \p maxMipmapLevelClamp
- * respectively, to be used when reading memory through the texture reference
- * \p hTexRef.
- *
- * Note that this call has no effect if \p hTexRef is not bound to a mipmapped array.
- *
- * \param hTexRef - Texture reference
- * \param minMipmapLevelClamp - Mipmap min level clamp
- * \param maxMipmapLevelClamp - Mipmap max level clamp
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
- /**
- * \brief Sets the maximum anisotropy for a texture reference
- *
- * \deprecated
- *
- * Specifies the maximum anisotropy \p maxAniso to be used when reading memory through
- * the texture reference \p hTexRef.
- *
- * Note that this call has no effect if \p hTexRef is bound to linear memory.
- *
- * \param hTexRef - Texture reference
- * \param maxAniso - Maximum anisotropy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso);
- /**
- * \brief Sets the border color for a texture reference
- *
- * \deprecated
- *
- * Specifies the value of the RGBA color via the \p pBorderColor to the texture reference
- * \p hTexRef. The color value supports only float type and holds color components in
- * the following sequence:
- * pBorderColor[0] holds 'R' component
- * pBorderColor[1] holds 'G' component
- * pBorderColor[2] holds 'B' component
- * pBorderColor[3] holds 'A' component
- *
- * Note that the color values can be set only when the Address mode is set to
- * CU_TR_ADDRESS_MODE_BORDER using ::cuTexRefSetAddressMode.
- * Applications using integer border color values have to "reinterpret_cast" their values to float.
- *
- * \param hTexRef - Texture reference
- * \param pBorderColor - RGBA color
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddressMode,
- * ::cuTexRefGetAddressMode, ::cuTexRefGetBorderColor
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetBorderColor(CUtexref hTexRef, float *pBorderColor);
- /**
- * \brief Sets the flags for a texture reference
- *
- * \deprecated
- *
- * Specifies optional flags via \p Flags to specify the behavior of data
- * returned through the texture reference \p hTexRef. The valid flags are:
- *
- * - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of
- * having the texture promote integer data to floating point data in the
- * range [0, 1]. Note that texture with 32-bit integer format
- * would not be promoted, regardless of whether or not this
- * flag is specified;
- * - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the
- * default behavior of having the texture coordinates range
- * from [0, Dim) where Dim is the width or height of the CUDA
- * array. Instead, the texture coordinates [0, 1.0) reference
- * the entire breadth of the array dimension;
- * - ::CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION, which disables any trilinear
- * filtering optimizations. Trilinear optimizations improve texture filtering
- * performance by allowing bilinear filtering on textures in scenarios where
- * it can closely approximate the expected results.
- *
- * \param hTexRef - Texture reference
- * \param Flags - Optional flags to set
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
- /**
- * \brief Gets the address associated with a texture reference
- *
- * \deprecated
- *
- * Returns in \p *pdptr the base address bound to the texture reference
- * \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture reference
- * is not bound to any device memory range.
- *
- * \param pdptr - Returned device address
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
- /**
- * \brief Gets the array bound to a texture reference
- *
- * \deprecated
- *
- * Returns in \p *phArray the CUDA array bound to the texture reference
- * \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture reference
- * is not bound to any CUDA array.
- *
- * \param phArray - Returned array
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);
- /**
- * \brief Gets the mipmapped array bound to a texture reference
- *
- * \deprecated
- *
- * Returns in \p *phMipmappedArray the CUDA mipmapped array bound to the texture
- * reference \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture reference
- * is not bound to any CUDA mipmapped array.
- *
- * \param phMipmappedArray - Returned mipmapped array
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMipmappedArray(CUmipmappedArray *phMipmappedArray, CUtexref hTexRef);
- /**
- * \brief Gets the addressing mode used by a texture reference
- *
- * \deprecated
- *
- * Returns in \p *pam the addressing mode corresponding to the
- * dimension \p dim of the texture reference \p hTexRef. Currently, the only
- * valid value for \p dim are 0 and 1.
- *
- * \param pam - Returned addressing mode
- * \param hTexRef - Texture reference
- * \param dim - Dimension
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim);
- /**
- * \brief Gets the filter-mode used by a texture reference
- *
- * \deprecated
- *
- * Returns in \p *pfm the filtering mode of the texture reference
- * \p hTexRef.
- *
- * \param pfm - Returned filtering mode
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
- /**
- * \brief Gets the format used by a texture reference
- *
- * \deprecated
- *
- * Returns in \p *pFormat and \p *pNumChannels the format and number
- * of components of the CUDA array bound to the texture reference \p hTexRef.
- * If \p pFormat or \p pNumChannels is NULL, it will be ignored.
- *
- * \param pFormat - Returned format
- * \param pNumChannels - Returned number of components
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef);
- /**
- * \brief Gets the mipmap filtering mode for a texture reference
- *
- * \deprecated
- *
- * Returns the mipmap filtering mode in \p pfm that's used when reading memory through
- * the texture reference \p hTexRef.
- *
- * \param pfm - Returned mipmap filtering mode
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMipmapFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
- /**
- * \brief Gets the mipmap level bias for a texture reference
- *
- * \deprecated
- *
- * Returns the mipmap level bias in \p pBias that's added to the specified mipmap
- * level when reading memory through the texture reference \p hTexRef.
- *
- * \param pbias - Returned mipmap level bias
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMipmapLevelBias(float *pbias, CUtexref hTexRef);
- /**
- * \brief Gets the min/max mipmap level clamps for a texture reference
- *
- * \deprecated
- *
- * Returns the min/max mipmap level clamps in \p pminMipmapLevelClamp and \p pmaxMipmapLevelClamp
- * that's used when reading memory through the texture reference \p hTexRef.
- *
- * \param pminMipmapLevelClamp - Returned mipmap min level clamp
- * \param pmaxMipmapLevelClamp - Returned mipmap max level clamp
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMipmapLevelClamp(float *pminMipmapLevelClamp, float *pmaxMipmapLevelClamp, CUtexref hTexRef);
- /**
- * \brief Gets the maximum anisotropy for a texture reference
- *
- * \deprecated
- *
- * Returns the maximum anisotropy in \p pmaxAniso that's used when reading memory through
- * the texture reference \p hTexRef.
- *
- * \param pmaxAniso - Returned maximum anisotropy
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMaxAnisotropy(int *pmaxAniso, CUtexref hTexRef);
- /**
- * \brief Gets the border color used by a texture reference
- *
- * \deprecated
- *
- * Returns in \p pBorderColor, values of the RGBA color used by
- * the texture reference \p hTexRef.
- * The color value is of type float and holds color components in
- * the following sequence:
- * pBorderColor[0] holds 'R' component
- * pBorderColor[1] holds 'G' component
- * pBorderColor[2] holds 'B' component
- * pBorderColor[3] holds 'A' component
- *
- * \param hTexRef - Texture reference
- * \param pBorderColor - Returned Type and Value of RGBA color
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddressMode,
- * ::cuTexRefSetAddressMode, ::cuTexRefSetBorderColor
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetBorderColor(float *pBorderColor, CUtexref hTexRef);
- /**
- * \brief Gets the flags used by a texture reference
- *
- * \deprecated
- *
- * Returns in \p *pFlags the flags of the texture reference \p hTexRef.
- *
- * \param pFlags - Returned flags
- * \param hTexRef - Texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefSetAddress,
- * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
- * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
- * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
- * ::cuTexRefGetFilterMode, ::cuTexRefGetFormat
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);
- /**
- * \brief Creates a texture reference
- *
- * \deprecated
- *
- * Creates a texture reference and returns its handle in \p *pTexRef. Once
- * created, the application must call ::cuTexRefSetArray() or
- * ::cuTexRefSetAddress() to associate the reference with allocated memory.
- * Other texture reference functions are used to specify the format and
- * interpretation (addressing, filtering, etc.) to be used when the memory is
- * read through this texture reference.
- *
- * \param pTexRef - Returned texture reference
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefDestroy
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef);
- /**
- * \brief Destroys a texture reference
- *
- * \deprecated
- *
- * Destroys the texture reference specified by \p hTexRef.
- *
- * \param hTexRef - Texture reference to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuTexRefCreate
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef);
- /** @} */ /* END CUDA_TEXREF_DEPRECATED */
- /**
- * \defgroup CUDA_SURFREF_DEPRECATED Surface Reference Management [DEPRECATED]
- *
- * ___MANBRIEF___ surface reference management functions of the low-level CUDA
- * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the surface reference management functions of the
- * low-level CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Sets the CUDA array for a surface reference.
- *
- * \deprecated
- *
- * Sets the CUDA array \p hArray to be read and written by the surface reference
- * \p hSurfRef. Any previous CUDA array state associated with the surface
- * reference is superseded by this function. \p Flags must be set to 0.
- * The ::CUDA_ARRAY3D_SURFACE_LDST flag must have been set for the CUDA array.
- * Any CUDA array previously bound to \p hSurfRef is unbound.
- * \param hSurfRef - Surface reference handle
- * \param hArray - CUDA array handle
- * \param Flags - set to 0
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuModuleGetSurfRef,
- * ::cuSurfRefGetArray
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
- /**
- * \brief Passes back the CUDA array bound to a surface reference.
- *
- * \deprecated
- *
- * Returns in \p *phArray the CUDA array bound to the surface reference
- * \p hSurfRef, or returns ::CUDA_ERROR_INVALID_VALUE if the surface reference
- * is not bound to any CUDA array.
- * \param phArray - Surface reference handle
- * \param hSurfRef - Surface reference handle
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa ::cuModuleGetSurfRef, ::cuSurfRefSetArray
- */
- __CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
- /** @} */ /* END CUDA_SURFREF_DEPRECATED */
- /**
- * \defgroup CUDA_TEXOBJECT Texture Object Management
- *
- * ___MANBRIEF___ texture object management functions of the low-level CUDA
- * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the texture object management functions of the
- * low-level CUDA driver application programming interface. The texture
- * object API is only supported on devices of compute capability 3.0 or higher.
- *
- * @{
- */
- /**
- * \brief Creates a texture object
- *
- * Creates a texture object and returns it in \p pTexObject. \p pResDesc describes
- * the data to texture from. \p pTexDesc describes how the data should be sampled.
- * \p pResViewDesc is an optional argument that specifies an alternate format for
- * the data described by \p pResDesc, and also describes the subresource region
- * to restrict access to when texturing. \p pResViewDesc can only be specified if
- * the type of resource is a CUDA array or a CUDA mipmapped array not in a block
- * compressed format.
- *
- * Texture objects are only supported on devices of compute capability 3.0 or higher.
- * Additionally, a texture object is an opaque value, and, as such, should only be
- * accessed through CUDA API calls.
- *
- * The ::CUDA_RESOURCE_DESC structure is defined as:
- * \code
- typedef struct CUDA_RESOURCE_DESC_st
- {
- CUresourcetype resType;
- union {
- struct {
- CUarray hArray;
- } array;
- struct {
- CUmipmappedArray hMipmappedArray;
- } mipmap;
- struct {
- CUdeviceptr devPtr;
- CUarray_format format;
- unsigned int numChannels;
- size_t sizeInBytes;
- } linear;
- struct {
- CUdeviceptr devPtr;
- CUarray_format format;
- unsigned int numChannels;
- size_t width;
- size_t height;
- size_t pitchInBytes;
- } pitch2D;
- } res;
- unsigned int flags;
- } CUDA_RESOURCE_DESC;
- * \endcode
- * where:
- * - ::CUDA_RESOURCE_DESC::resType specifies the type of resource to texture from.
- * CUresourceType is defined as:
- * \code
- typedef enum CUresourcetype_enum {
- CU_RESOURCE_TYPE_ARRAY = 0x00,
- CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
- CU_RESOURCE_TYPE_LINEAR = 0x02,
- CU_RESOURCE_TYPE_PITCH2D = 0x03
- } CUresourcetype;
- * \endcode
- *
- * \par
- * If ::CUDA_RESOURCE_DESC::resType is set to ::CU_RESOURCE_TYPE_ARRAY, ::CUDA_RESOURCE_DESC::res::array::hArray
- * must be set to a valid CUDA array handle.
- *
- * \par
- * If ::CUDA_RESOURCE_DESC::resType is set to ::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY, ::CUDA_RESOURCE_DESC::res::mipmap::hMipmappedArray
- * must be set to a valid CUDA mipmapped array handle.
- *
- * \par
- * If ::CUDA_RESOURCE_DESC::resType is set to ::CU_RESOURCE_TYPE_LINEAR, ::CUDA_RESOURCE_DESC::res::linear::devPtr
- * must be set to a valid device pointer, that is aligned to ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT.
- * ::CUDA_RESOURCE_DESC::res::linear::format and ::CUDA_RESOURCE_DESC::res::linear::numChannels
- * describe the format of each component and the number of components per array element. ::CUDA_RESOURCE_DESC::res::linear::sizeInBytes
- * specifies the size of the array in bytes. The total number of elements in the linear address range cannot exceed
- * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of elements is computed as (sizeInBytes / (sizeof(format) * numChannels)).
- *
- * \par
- * If ::CUDA_RESOURCE_DESC::resType is set to ::CU_RESOURCE_TYPE_PITCH2D, ::CUDA_RESOURCE_DESC::res::pitch2D::devPtr
- * must be set to a valid device pointer, that is aligned to ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT.
- * ::CUDA_RESOURCE_DESC::res::pitch2D::format and ::CUDA_RESOURCE_DESC::res::pitch2D::numChannels
- * describe the format of each component and the number of components per array element. ::CUDA_RESOURCE_DESC::res::pitch2D::width
- * and ::CUDA_RESOURCE_DESC::res::pitch2D::height specify the width and height of the array in elements, and cannot exceed
- * ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively.
- * ::CUDA_RESOURCE_DESC::res::pitch2D::pitchInBytes specifies the pitch between two rows in bytes and has to be aligned to
- * ::CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. Pitch cannot exceed ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH.
- *
- * - ::flags must be set to zero.
- *
- *
- * The ::CUDA_TEXTURE_DESC struct is defined as
- * \code
- typedef struct CUDA_TEXTURE_DESC_st {
- CUaddress_mode addressMode[3];
- CUfilter_mode filterMode;
- unsigned int flags;
- unsigned int maxAnisotropy;
- CUfilter_mode mipmapFilterMode;
- float mipmapLevelBias;
- float minMipmapLevelClamp;
- float maxMipmapLevelClamp;
- } CUDA_TEXTURE_DESC;
- * \endcode
- * where
- * - ::CUDA_TEXTURE_DESC::addressMode specifies the addressing mode for each dimension of the texture data. ::CUaddress_mode is defined as:
- * \code
- typedef enum CUaddress_mode_enum {
- CU_TR_ADDRESS_MODE_WRAP = 0,
- CU_TR_ADDRESS_MODE_CLAMP = 1,
- CU_TR_ADDRESS_MODE_MIRROR = 2,
- CU_TR_ADDRESS_MODE_BORDER = 3
- } CUaddress_mode;
- * \endcode
- * This is ignored if ::CUDA_RESOURCE_DESC::resType is ::CU_RESOURCE_TYPE_LINEAR. Also, if the flag, ::CU_TRSF_NORMALIZED_COORDINATES
- * is not set, the only supported address mode is ::CU_TR_ADDRESS_MODE_CLAMP.
- *
- * - ::CUDA_TEXTURE_DESC::filterMode specifies the filtering mode to be used when fetching from the texture. CUfilter_mode is defined as:
- * \code
- typedef enum CUfilter_mode_enum {
- CU_TR_FILTER_MODE_POINT = 0,
- CU_TR_FILTER_MODE_LINEAR = 1
- } CUfilter_mode;
- * \endcode
- * This is ignored if ::CUDA_RESOURCE_DESC::resType is ::CU_RESOURCE_TYPE_LINEAR.
- *
- * - ::CUDA_TEXTURE_DESC::flags can be any combination of the following:
- * - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of
- * having the texture promote integer data to floating point data in the
- * range [0, 1]. Note that texture with 32-bit integer format would not be
- * promoted, regardless of whether or not this flag is specified.
- * - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior
- * of having the texture coordinates range from [0, Dim) where Dim is the
- * width or height of the CUDA array. Instead, the texture coordinates
- * [0, 1.0) reference the entire breadth of the array dimension; Note that
- * for CUDA mipmapped arrays, this flag has to be set.
- * - ::CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION, which disables any trilinear
- * filtering optimizations. Trilinear optimizations improve texture filtering
- * performance by allowing bilinear filtering on textures in scenarios where
- * it can closely approximate the expected results.
- * - ::CU_TRSF_SEAMLESS_CUBEMAP, which enables seamless cube map filtering.
- * This flag can only be specified if the underlying resource is a CUDA array
- * or a CUDA mipmapped array that was created with the flag ::CUDA_ARRAY3D_CUBEMAP.
- * When seamless cube map filtering is enabled, texture address modes specified
- * by ::CUDA_TEXTURE_DESC::addressMode are ignored. Instead, if the ::CUDA_TEXTURE_DESC::filterMode
- * is set to ::CU_TR_FILTER_MODE_POINT the address mode ::CU_TR_ADDRESS_MODE_CLAMP
- * will be applied for all dimensions. If the ::CUDA_TEXTURE_DESC::filterMode is
- * set to ::CU_TR_FILTER_MODE_LINEAR seamless cube map filtering will be performed
- * when sampling along the cube face borders.
- *
- * - ::CUDA_TEXTURE_DESC::maxAnisotropy specifies the maximum anisotropy ratio to be used when doing anisotropic filtering. This value will be
- * clamped to the range [1,16].
- *
- * - ::CUDA_TEXTURE_DESC::mipmapFilterMode specifies the filter mode when the calculated mipmap level lies between two defined mipmap levels.
- *
- * - ::CUDA_TEXTURE_DESC::mipmapLevelBias specifies the offset to be applied to the calculated mipmap level.
- *
- * - ::CUDA_TEXTURE_DESC::minMipmapLevelClamp specifies the lower end of the mipmap level range to clamp access to.
- *
- * - ::CUDA_TEXTURE_DESC::maxMipmapLevelClamp specifies the upper end of the mipmap level range to clamp access to.
- *
- *
- * The ::CUDA_RESOURCE_VIEW_DESC struct is defined as
- * \code
- typedef struct CUDA_RESOURCE_VIEW_DESC_st
- {
- CUresourceViewFormat format;
- size_t width;
- size_t height;
- size_t depth;
- unsigned int firstMipmapLevel;
- unsigned int lastMipmapLevel;
- unsigned int firstLayer;
- unsigned int lastLayer;
- } CUDA_RESOURCE_VIEW_DESC;
- * \endcode
- * where:
- * - ::CUDA_RESOURCE_VIEW_DESC::format specifies how the data contained in the CUDA array or CUDA mipmapped array should
- * be interpreted. Note that this can incur a change in size of the texture data. If the resource view format is a block
- * compressed format, then the underlying CUDA array or CUDA mipmapped array has to have a base of format ::CU_AD_FORMAT_UNSIGNED_INT32.
- * with 2 or 4 channels, depending on the block compressed format. For ex., BC1 and BC4 require the underlying CUDA array to have
- * a format of ::CU_AD_FORMAT_UNSIGNED_INT32 with 2 channels. The other BC formats require the underlying resource to have the same base
- * format but with 4 channels.
- *
- * - ::CUDA_RESOURCE_VIEW_DESC::width specifies the new width of the texture data. If the resource view format is a block
- * compressed format, this value has to be 4 times the original width of the resource. For non block compressed formats,
- * this value has to be equal to that of the original resource.
- *
- * - ::CUDA_RESOURCE_VIEW_DESC::height specifies the new height of the texture data. If the resource view format is a block
- * compressed format, this value has to be 4 times the original height of the resource. For non block compressed formats,
- * this value has to be equal to that of the original resource.
- *
- * - ::CUDA_RESOURCE_VIEW_DESC::depth specifies the new depth of the texture data. This value has to be equal to that of the
- * original resource.
- *
- * - ::CUDA_RESOURCE_VIEW_DESC::firstMipmapLevel specifies the most detailed mipmap level. This will be the new mipmap level zero.
- * For non-mipmapped resources, this value has to be zero.::CUDA_TEXTURE_DESC::minMipmapLevelClamp and ::CUDA_TEXTURE_DESC::maxMipmapLevelClamp
- * will be relative to this value. For ex., if the firstMipmapLevel is set to 2, and a minMipmapLevelClamp of 1.2 is specified,
- * then the actual minimum mipmap level clamp will be 3.2.
- *
- * - ::CUDA_RESOURCE_VIEW_DESC::lastMipmapLevel specifies the least detailed mipmap level. For non-mipmapped resources, this value
- * has to be zero.
- *
- * - ::CUDA_RESOURCE_VIEW_DESC::firstLayer specifies the first layer index for layered textures. This will be the new layer zero.
- * For non-layered resources, this value has to be zero.
- *
- * - ::CUDA_RESOURCE_VIEW_DESC::lastLayer specifies the last layer index for layered textures. For non-layered resources,
- * this value has to be zero.
- *
- *
- * \param pTexObject - Texture object to create
- * \param pResDesc - Resource descriptor
- * \param pTexDesc - Texture descriptor
- * \param pResViewDesc - Resource view descriptor
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexObjectDestroy,
- * ::cudaCreateTextureObject
- */
- CUresult CUDAAPI cuTexObjectCreate(CUtexObject *pTexObject, const CUDA_RESOURCE_DESC *pResDesc, const CUDA_TEXTURE_DESC *pTexDesc, const CUDA_RESOURCE_VIEW_DESC *pResViewDesc);
- /**
- * \brief Destroys a texture object
- *
- * Destroys the texture object specified by \p texObject.
- *
- * \param texObject - Texture object to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexObjectCreate,
- * ::cudaDestroyTextureObject
- */
- CUresult CUDAAPI cuTexObjectDestroy(CUtexObject texObject);
- /**
- * \brief Returns a texture object's resource descriptor
- *
- * Returns the resource descriptor for the texture object specified by \p texObject.
- *
- * \param pResDesc - Resource descriptor
- * \param texObject - Texture object
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexObjectCreate,
- * ::cudaGetTextureObjectResourceDesc,
- */
- CUresult CUDAAPI cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, CUtexObject texObject);
- /**
- * \brief Returns a texture object's texture descriptor
- *
- * Returns the texture descriptor for the texture object specified by \p texObject.
- *
- * \param pTexDesc - Texture descriptor
- * \param texObject - Texture object
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexObjectCreate,
- * ::cudaGetTextureObjectTextureDesc
- */
- CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, CUtexObject texObject);
- /**
- * \brief Returns a texture object's resource view descriptor
- *
- * Returns the resource view descriptor for the texture object specified by \p texObject.
- * If no resource view was set for \p texObject, the ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * \param pResViewDesc - Resource view descriptor
- * \param texObject - Texture object
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTexObjectCreate,
- * ::cudaGetTextureObjectResourceViewDesc
- */
- CUresult CUDAAPI cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC *pResViewDesc, CUtexObject texObject);
- /** @} */ /* END CUDA_TEXOBJECT */
- /**
- * \defgroup CUDA_SURFOBJECT Surface Object Management
- *
- * ___MANBRIEF___ surface object management functions of the low-level CUDA
- * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the surface object management functions of the
- * low-level CUDA driver application programming interface. The surface
- * object API is only supported on devices of compute capability 3.0 or higher.
- *
- * @{
- */
- /**
- * \brief Creates a surface object
- *
- * Creates a surface object and returns it in \p pSurfObject. \p pResDesc describes
- * the data to perform surface load/stores on. ::CUDA_RESOURCE_DESC::resType must be
- * ::CU_RESOURCE_TYPE_ARRAY and ::CUDA_RESOURCE_DESC::res::array::hArray
- * must be set to a valid CUDA array handle. ::CUDA_RESOURCE_DESC::flags must be set to zero.
- *
- * Surface objects are only supported on devices of compute capability 3.0 or higher.
- * Additionally, a surface object is an opaque value, and, as such, should only be
- * accessed through CUDA API calls.
- *
- * \param pSurfObject - Surface object to create
- * \param pResDesc - Resource descriptor
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuSurfObjectDestroy,
- * ::cudaCreateSurfaceObject
- */
- CUresult CUDAAPI cuSurfObjectCreate(CUsurfObject *pSurfObject, const CUDA_RESOURCE_DESC *pResDesc);
- /**
- * \brief Destroys a surface object
- *
- * Destroys the surface object specified by \p surfObject.
- *
- * \param surfObject - Surface object to destroy
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuSurfObjectCreate,
- * ::cudaDestroySurfaceObject
- */
- CUresult CUDAAPI cuSurfObjectDestroy(CUsurfObject surfObject);
- /**
- * \brief Returns a surface object's resource descriptor
- *
- * Returns the resource descriptor for the surface object specified by \p surfObject.
- *
- * \param pResDesc - Resource descriptor
- * \param surfObject - Surface object
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuSurfObjectCreate,
- * ::cudaGetSurfaceObjectResourceDesc
- */
- CUresult CUDAAPI cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, CUsurfObject surfObject);
- /** @} */ /* END CUDA_SURFOBJECT */
- /**
- * \defgroup CUDA_TENSOR_MEMORY Tensor Map Object Managment
- *
- * ___MANBRIEF___ tensor map object management functions of the low-level CUDA
- * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the tensor map object management functions of the
- * low-level CUDA driver application programming interface. The tensor
- * core API is only supported on devices of compute capability 9.0 or higher.
- *
- * @{
- */
- /**
- * \brief Create a tensor map descriptor object representing tiled memory region
- *
- * Creates a descriptor for Tensor Memory Access (TMA) object specified
- * by the parameters describing a tiled region and returns it in \p tensorMap.
- *
- * Tensor map objects are only supported on devices of compute capability 9.0 or higher.
- * Additionally, a tensor map object is an opaque value, and, as such, should only be
- * accessed through CUDA APIs and PTX.
- *
- * The parameters passed are bound to the following requirements:
- *
- * - \p tensorMap address must be aligned to 64 bytes.
- *
- * - \p tensorDataType has to be an enum from ::CUtensorMapDataType which is defined as:
- * \code
- typedef enum CUtensorMapDataType_enum {
- CU_TENSOR_MAP_DATA_TYPE_UINT8 = 0, // 1 byte
- CU_TENSOR_MAP_DATA_TYPE_UINT16, // 2 bytes
- CU_TENSOR_MAP_DATA_TYPE_UINT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_INT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_UINT64, // 8 bytes
- CU_TENSOR_MAP_DATA_TYPE_INT64, // 8 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT16, // 2 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT64, // 8 bytes
- CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, // 2 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B, // 4 bits
- CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, // 4 bits
- CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B // 6 bits
- } CUtensorMapDataType;
- * \endcode
- * ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B copies '16 x U4' packed values to memory aligned as 8 bytes. There are no gaps between packed values.
- * ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B copies '16 x U4' packed values to memory aligned as 16 bytes. There are 8 byte gaps between every 8 byte chunk of packed values.
- * ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B copies '16 x U6' packed values to memory aligned as 16 bytes. There are 4 byte gaps between every 12 byte chunk of packed values.
- *
- * - \p tensorRank must be non-zero and less than or equal to the maximum supported dimensionality of 5. If \p interleave is not
- * ::CU_TENSOR_MAP_INTERLEAVE_NONE, then \p tensorRank must additionally be greater than or equal to 3.
- *
- * - \p globalAddress, which specifies the starting address of the memory region described, must be 16 byte aligned. The following requirements need to also be met:
- * - When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, \p globalAddress must be 32 byte aligned.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, \p globalAddress must be 32 byte aligned.
- *
- * - \p globalDim array, which specifies tensor size of each of the \p tensorRank dimensions, must be non-zero and less than or
- * equal to 2^32. Additionally, the following requirements need to be met for the packed data types:
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, globalDim[0] must be a multiple of 128.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B, \p globalDim[0] must be a multiple of 2.
- * - Dimension for the packed data types must reflect the number of individual U# values.
- *
- * - \p globalStrides array, which specifies tensor stride of each of the lower \p tensorRank - 1 dimensions in bytes, must be a
- * multiple of 16 and less than 2^40. Additionally, the following requirements need to be met:
- * - When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, the strides must be a multiple of 32.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, the strides must be a multiple of 32.
- * Each following dimension specified includes previous dimension stride:
- * \code
- globalStrides[0] = globalDim[0] * elementSizeInBytes(tensorDataType) + padding[0];
- for (i = 1; i < tensorRank - 1; i++)
- globalStrides[i] = globalStrides[i – 1] * (globalDim[i] + padding[i]);
- assert(globalStrides[i] >= globalDim[i]);
- * \endcode
- *
- * - \p boxDim array, which specifies number of elements to be traversed along each of the \p tensorRank dimensions, must be non-zero
- * and less than or equal to 256. Additionally, the following requirements need to be met:
- * - When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE, { \p boxDim[0] * elementSizeInBytes( \p tensorDataType ) } must be a multiple of 16 bytes.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, boxDim[0] must be 128.
- *
- * - \p elementStrides array, which specifies the iteration step along each of the \p tensorRank dimensions, must be non-zero and less
- * than or equal to 8. Note that when \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE, the first element of this array is ignored since
- * TMA doesn’t support the stride for dimension zero.
- * When all elements of \p elementStrides array is one, \p boxDim specifies the number of elements to load. However, if the \p elementStrides[i]
- * is not equal to one, then TMA loads ceil( \p boxDim[i] / \p elementStrides[i]) number of elements along i-th dimension. To load N elements along
- * i-th dimension, \p boxDim[i] must be set to N * \p elementStrides[i].
- *
- * - \p interleave specifies the interleaved layout of type ::CUtensorMapInterleave, which is defined as:
- * \code
- typedef enum CUtensorMapInterleave_enum {
- CU_TENSOR_MAP_INTERLEAVE_NONE = 0,
- CU_TENSOR_MAP_INTERLEAVE_16B,
- CU_TENSOR_MAP_INTERLEAVE_32B
- } CUtensorMapInterleave;
- * \endcode
- * TMA supports interleaved layouts like NC/8HWC8 where C8 utilizes 16 bytes in memory assuming 2 byte per channel or NC/16HWC16 where C16
- * uses 32 bytes.
- * When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE and \p swizzle is not ::CU_TENSOR_MAP_SWIZZLE_NONE, the bounding box inner dimension
- * (computed as \p boxDim[0] multiplied by element size derived from \p tensorDataType) must be less than or equal to the swizzle size.
- * - CU_TENSOR_MAP_SWIZZLE_32B requires the bounding box inner dimension to be <= 32.
- * - CU_TENSOR_MAP_SWIZZLE_64B requires the bounding box inner dimension to be <= 64.
- * - CU_TENSOR_MAP_SWIZZLE_128B* require the bounding box inner dimension to be <= 128.
- * Additionally, \p tensorDataType of ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B requires \p interleave to be ::CU_TENSOR_MAP_INTERLEAVE_NONE.
- *
- * - \p swizzle, which specifies the shared memory bank swizzling pattern, has to be of type ::CUtensorMapSwizzle which is defined as:
- * \code
- typedef enum CUtensorMapSwizzle_enum {
- CU_TENSOR_MAP_SWIZZLE_NONE = 0,
- CU_TENSOR_MAP_SWIZZLE_32B, // Swizzle 16B chunks within 32B span
- CU_TENSOR_MAP_SWIZZLE_64B, // Swizzle 16B chunks within 64B span
- CU_TENSOR_MAP_SWIZZLE_128B, // Swizzle 16B chunks within 128B span
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B, // Swizzle 32B chunks within 128B span
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B_FLIP_8B, // Swizzle 32B chunks within 128B span, additionally swap lower 8B with upper 8B within each 16B for every alternate row
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_64B // Swizzle 64B chunks within 128B span
- } CUtensorMapSwizzle;
- * \endcode
- * Data are organized in a specific order in global memory; however, this may not match the order in which the application accesses data
- * in shared memory. This difference in data organization may cause bank conflicts when shared memory is accessed. In order to avoid this
- * problem, data can be loaded to shared memory with shuffling across shared memory banks.
- * When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, \p swizzle must be ::CU_TENSOR_MAP_SWIZZLE_32B.
- * Other interleave modes can have any swizzling pattern.
- * When the \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B, only the following swizzle modes are supported:
- * - CU_TENSOR_MAP_SWIZZLE_NONE (Load & Store)
- * - CU_TENSOR_MAP_SWIZZLE_128B (Load & Store)
- * - CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B (Load & Store)
- * - CU_TENSOR_MAP_SWIZZLE_128B_ATOM_64B (Store only)
- * When the \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, only the following swizzle modes are supported:
- * - CU_TENSOR_MAP_SWIZZLE_NONE (Load only)
- * - CU_TENSOR_MAP_SWIZZLE_128B (Load only)
- * - CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B (Load only)
- *
- * - \p l2Promotion specifies L2 fetch size which indicates the byte granurality at which L2 requests is filled from DRAM. It must be of
- * type ::CUtensorMapL2promotion, which is defined as:
- * \code
- typedef enum CUtensorMapL2promotion_enum {
- CU_TENSOR_MAP_L2_PROMOTION_NONE = 0,
- CU_TENSOR_MAP_L2_PROMOTION_L2_64B,
- CU_TENSOR_MAP_L2_PROMOTION_L2_128B,
- CU_TENSOR_MAP_L2_PROMOTION_L2_256B
- } CUtensorMapL2promotion;
- * \endcode
- *
- * - \p oobFill, which indicates whether zero or a special NaN constant should be used to fill out-of-bound elements, must be of type
- * ::CUtensorMapFloatOOBfill which is defined as:
- * \code
- typedef enum CUtensorMapFloatOOBfill_enum {
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0,
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA
- } CUtensorMapFloatOOBfill;
- * \endcode
- * Note that ::CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA can only be used when \p tensorDataType represents a floating-point data type,
- * and when \p tensorDataType is not ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B, ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, and ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B.
- *
- * \param tensorMap - Tensor map object to create
- * \param tensorDataType - Tensor data type
- * \param tensorRank - Dimensionality of tensor
- * \param globalAddress - Starting address of memory region described by tensor
- * \param globalDim - Array containing tensor size (number of elements) along each of the \p tensorRank dimensions
- * \param globalStrides - Array containing stride size (in bytes) along each of the \p tensorRank - 1 dimensions
- * \param boxDim - Array containing traversal box size (number of elments) along each of the \p tensorRank dimensions. Specifies how many elements to be traversed along each tensor dimension.
- * \param elementStrides - Array containing traversal stride in each of the \p tensorRank dimensions
- * \param interleave - Type of interleaved layout the tensor addresses
- * \param swizzle - Bank swizzling pattern inside shared memory
- * \param l2Promotion - L2 promotion size
- * \param oobFill - Indicate whether zero or special NaN constant must be used to fill out-of-bound elements
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTensorMapEncodeIm2col,
- * ::cuTensorMapEncodeIm2colWide,
- * ::cuTensorMapReplaceAddress
- */
- CUresult CUDAAPI cuTensorMapEncodeTiled(CUtensorMap *tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void *globalAddress, const cuuint64_t *globalDim, const cuuint64_t *globalStrides, const cuuint32_t *boxDim, const cuuint32_t *elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill);
- /**
- * \brief Create a tensor map descriptor object representing im2col memory region
- *
- * Creates a descriptor for Tensor Memory Access (TMA) object specified
- * by the parameters describing a im2col memory layout and returns it in \p tensorMap.
- *
- * Tensor map objects are only supported on devices of compute capability 9.0 or higher.
- * Additionally, a tensor map object is an opaque value, and, as such, should only be
- * accessed through CUDA APIs and PTX.
- *
- * The parameters passed are bound to the following requirements:
- *
- * - \p tensorMap address must be aligned to 64 bytes.
- *
- * - \p tensorDataType has to be an enum from ::CUtensorMapDataType which is defined as:
- * \code
- typedef enum CUtensorMapDataType_enum {
- CU_TENSOR_MAP_DATA_TYPE_UINT8 = 0, // 1 byte
- CU_TENSOR_MAP_DATA_TYPE_UINT16, // 2 bytes
- CU_TENSOR_MAP_DATA_TYPE_UINT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_INT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_UINT64, // 8 bytes
- CU_TENSOR_MAP_DATA_TYPE_INT64, // 8 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT16, // 2 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT64, // 8 bytes
- CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, // 2 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B, // 4 bits
- CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, // 4 bits
- CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B // 6 bits
- } CUtensorMapDataType;
- * \endcode
- * ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B copies '16 x U4' packed values to memory aligned as 8 bytes. There are no gaps between packed values.
- * ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B copies '16 x U4' packed values to memory aligned as 16 bytes. There are 8 byte gaps between every 8 byte chunk of packed values.
- * ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B copies '16 x U6' packed values to memory aligned as 16 bytes. There are 4 byte gaps between every 12 byte chunk of packed values.
- *
- * - \p tensorRank, which specifies the number of tensor dimensions, must be 3, 4, or 5.
- *
- * - \p globalAddress, which specifies the starting address of the memory region described, must be 16 byte aligned. The following requirements need to also be met:
- * - When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, \p globalAddress must be 32 byte aligned.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, \p globalAddress must be 32 byte aligned.
- *
- * - \p globalDim array, which specifies tensor size of each of the \p tensorRank dimensions, must be non-zero and less than or
- * equal to 2^32. Additionally, the following requirements need to be met for the packed data types:
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, globalDim[0] must be a multiple of 128.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B, \p globalDim[0] must be a multiple of 2.
- * - Dimension for the packed data types must reflect the number of individual U# values.
- *
- * - \p globalStrides array, which specifies tensor stride of each of the lower \p tensorRank - 1 dimensions in bytes, must be a
- * multiple of 16 and less than 2^40. Additionally, the following requirements need to be met:
- * - When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, the strides must be a multiple of 32.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, the strides must be a multiple of 32.
- * Each following dimension specified includes previous dimension stride:
- * \code
- globalStrides[0] = globalDim[0] * elementSizeInBytes(tensorDataType) + padding[0];
- for (i = 1; i < tensorRank - 1; i++)
- globalStrides[i] = globalStrides[i – 1] * (globalDim[i] + padding[i]);
- assert(globalStrides[i] >= globalDim[i]);
- * \endcode
- *
- * - \p pixelBoxLowerCorner array specifies the coordinate offsets {D, H, W} of the bounding box from top/left/front corner. The number of
- * offsets and their precision depend on the tensor dimensionality:
- * - When \p tensorRank is 3, one signed offset within range [-32768, 32767] is supported.
- * - When \p tensorRank is 4, two signed offsets each within range [-128, 127] are supported.
- * - When \p tensorRank is 5, three offsets each within range [-16, 15] are supported.
- *
- * - \p pixelBoxUpperCorner array specifies the coordinate offsets {D, H, W} of the bounding box from bottom/right/back corner. The number of
- * offsets and their precision depend on the tensor dimensionality:
- * - When \p tensorRank is 3, one signed offset within range [-32768, 32767] is supported.
- * - When \p tensorRank is 4, two signed offsets each within range [-128, 127] are supported.
- * - When \p tensorRank is 5, three offsets each within range [-16, 15] are supported.
- * The bounding box specified by \p pixelBoxLowerCorner and \p pixelBoxUpperCorner must have non-zero area.
- *
- * - \p channelsPerPixel, which specifies the number of elements which must be accessed along C dimension, must be less than or equal to 256.
- * Additionally, when \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, \p channelsPerPixel must be 128.
- *
- * - \p pixelsPerColumn, which specifies the number of elements that must be accessed along the {N, D, H, W} dimensions, must be less than or
- * equal to 1024.
- *
- * - \p elementStrides array, which specifies the iteration step along each of the \p tensorRank dimensions, must be non-zero and less
- * than or equal to 8. Note that when \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE, the first element of this array is ignored since
- * TMA doesn’t support the stride for dimension zero.
- * When all elements of the \p elementStrides array are one, \p boxDim specifies the number of elements to load. However, if \p elementStrides[i]
- * is not equal to one for some \p i, then TMA loads ceil( \p boxDim[i] / \p elementStrides[i]) number of elements along i-th dimension.
- * To load N elements along i-th dimension, \p boxDim[i] must be set to N * \p elementStrides[i].
- *
- * - \p interleave specifies the interleaved layout of type ::CUtensorMapInterleave, which is defined as:
- * \code
- typedef enum CUtensorMapInterleave_enum {
- CU_TENSOR_MAP_INTERLEAVE_NONE = 0,
- CU_TENSOR_MAP_INTERLEAVE_16B,
- CU_TENSOR_MAP_INTERLEAVE_32B
- } CUtensorMapInterleave;
- * \endcode
- * TMA supports interleaved layouts like NC/8HWC8 where C8 utilizes 16 bytes in memory assuming 2 byte per channel or NC/16HWC16 where C16
- * uses 32 bytes.
- * When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE and \p swizzle is not ::CU_TENSOR_MAP_SWIZZLE_NONE, the bounding box inner dimension
- * (computed as \p channelsPerPixel multiplied by element size in bytes derived from \p tensorDataType) must be less than or equal to the swizzle size.
- * - CU_TENSOR_MAP_SWIZZLE_32B requires the bounding box inner dimension to be <= 32.
- * - CU_TENSOR_MAP_SWIZZLE_64B requires the bounding box inner dimension to be <= 64.
- * - CU_TENSOR_MAP_SWIZZLE_128B* require the bounding box inner dimension to be <= 128.
- * Additionally, \p tensorDataType of ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B requires \p interleave to be ::CU_TENSOR_MAP_INTERLEAVE_NONE.
- *
- * - \p swizzle, which specifies the shared memory bank swizzling pattern, has to be of type ::CUtensorMapSwizzle which is defined as:
- * \code
- typedef enum CUtensorMapSwizzle_enum {
- CU_TENSOR_MAP_SWIZZLE_NONE = 0,
- CU_TENSOR_MAP_SWIZZLE_32B, // Swizzle 16B chunks within 32B span
- CU_TENSOR_MAP_SWIZZLE_64B, // Swizzle 16B chunks within 64B span
- CU_TENSOR_MAP_SWIZZLE_128B, // Swizzle 16B chunks within 128B span
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B, // Swizzle 32B chunks within 128B span
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B_FLIP_8B, // Swizzle 32B chunks within 128B span, additionally swap lower 8B with upper 8B within each 16B for every alternate row
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_64B // Swizzle 64B chunks within 128B span
- } CUtensorMapSwizzle;
- * \endcode
- * Data are organized in a specific order in global memory; however, this may not match the order in which the application accesses data
- * in shared memory. This difference in data organization may cause bank conflicts when shared memory is accessed. In order to avoid this
- * problem, data can be loaded to shared memory with shuffling across shared memory banks.
- * When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, \p swizzle must be ::CU_TENSOR_MAP_SWIZZLE_32B.
- * Other interleave modes can have any swizzling pattern.
- * When the \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B, only the following swizzle modes are supported:
- * - CU_TENSOR_MAP_SWIZZLE_NONE (Load & Store)
- * - CU_TENSOR_MAP_SWIZZLE_128B (Load & Store)
- * - CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B (Load & Store)
- * - CU_TENSOR_MAP_SWIZZLE_128B_ATOM_64B (Store only)
- * When the \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, only the following swizzle modes are supported:
- * - CU_TENSOR_MAP_SWIZZLE_NONE (Load only)
- * - CU_TENSOR_MAP_SWIZZLE_128B (Load only)
- * - CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B (Load only)
- *
- * - \p l2Promotion specifies L2 fetch size which indicates the byte granularity at which L2 requests are filled from DRAM. It must be of
- * type ::CUtensorMapL2promotion, which is defined as:
- * \code
- typedef enum CUtensorMapL2promotion_enum {
- CU_TENSOR_MAP_L2_PROMOTION_NONE = 0,
- CU_TENSOR_MAP_L2_PROMOTION_L2_64B,
- CU_TENSOR_MAP_L2_PROMOTION_L2_128B,
- CU_TENSOR_MAP_L2_PROMOTION_L2_256B
- } CUtensorMapL2promotion;
- * \endcode
- *
- * - \p oobFill, which indicates whether zero or a special NaN constant should be used to fill out-of-bound elements, must be of type
- * ::CUtensorMapFloatOOBfill which is defined as:
- * \code
- typedef enum CUtensorMapFloatOOBfill_enum {
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0,
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA
- } CUtensorMapFloatOOBfill;
- * \endcode
- * Note that ::CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA can only be used when \p tensorDataType represents a floating-point data type,
- * and when \p tensorDataType is not ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B, ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, and ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B.
- *
- * \param tensorMap - Tensor map object to create
- * \param tensorDataType - Tensor data type
- * \param tensorRank - Dimensionality of tensor; must be at least 3
- * \param globalAddress - Starting address of memory region described by tensor
- * \param globalDim - Array containing tensor size (number of elements) along each of the \p tensorRank dimensions
- * \param globalStrides - Array containing stride size (in bytes) along each of the \p tensorRank - 1 dimensions
- * \param pixelBoxLowerCorner - Array containing DHW dimensions of lower box corner
- * \param pixelBoxUpperCorner - Array containing DHW dimensions of upper box corner
- * \param channelsPerPixel - Number of channels per pixel
- * \param pixelsPerColumn - Number of pixels per column
- * \param elementStrides - Array containing traversal stride in each of the \p tensorRank dimensions
- * \param interleave - Type of interleaved layout the tensor addresses
- * \param swizzle - Bank swizzling pattern inside shared memory
- * \param l2Promotion - L2 promotion size
- * \param oobFill - Indicate whether zero or special NaN constant will be used to fill out-of-bound elements
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTensorMapEncodeTiled,
- * ::cuTensorMapEncodeIm2colWide,
- * ::cuTensorMapReplaceAddress
- */
- CUresult CUDAAPI cuTensorMapEncodeIm2col(CUtensorMap *tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void *globalAddress, const cuuint64_t *globalDim, const cuuint64_t *globalStrides, const int *pixelBoxLowerCorner, const int *pixelBoxUpperCorner, cuuint32_t channelsPerPixel, cuuint32_t pixelsPerColumn, const cuuint32_t *elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill);
- /**
- * \brief Create a tensor map descriptor object representing im2col memory region, but where
- * the elements are exclusively loaded along the W dimension.
- *
- * Creates a descriptor for Tensor Memory Access (TMA) object specified by the parameters
- * describing a im2col memory layout and where the row is always loaded along the W dimensuin
- * and returns it in \p tensorMap. This assumes the tensor layout in memory is either NDHWC,
- * NHWC, or NWC.
- *
- * This API is only supported on devices of compute capability 10.0 or higher.
- * Additionally, a tensor map object is an opaque value, and, as such, should only be
- * accessed through CUDA APIs and PTX.
- *
- * The parameters passed are bound to the following requirements:
- *
- * - \p tensorMap address must be aligned to 64 bytes.
- *
- * - \p tensorDataType has to be an enum from ::CUtensorMapDataType which is defined as:
- * \code
- typedef enum CUtensorMapDataType_enum {
- CU_TENSOR_MAP_DATA_TYPE_UINT8 = 0, // 1 byte
- CU_TENSOR_MAP_DATA_TYPE_UINT16, // 2 bytes
- CU_TENSOR_MAP_DATA_TYPE_UINT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_INT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_UINT64, // 8 bytes
- CU_TENSOR_MAP_DATA_TYPE_INT64, // 8 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT16, // 2 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT64, // 8 bytes
- CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, // 2 bytes
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32, // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ // 4 bytes
- CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B, // 4 bits
- CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, // 4 bits
- CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B // 6 bits
- } CUtensorMapDataType;
- * \endcode
- * ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B copies '16 x U4' packed values to memory aligned as 8 bytes. There are no gaps between packed values.
- * ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B copies '16 x U4' packed values to memory aligned as 16 bytes. There are 8 byte gaps between every 8 byte chunk of packed values.
- * ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B copies '16 x U6' packed values to memory aligned as 16 bytes. There are 4 byte gaps between every 12 byte chunk of packed values.
- *
- * - \p tensorRank, which specifies the number of tensor dimensions, must be 3, 4, or 5.
- *
- * - \p globalAddress, which specifies the starting address of the memory region described, must be 16 byte aligned. The following requirements need to also be met:
- * - When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, \p globalAddress must be 32 byte aligned.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, \p globalAddress must be 32 byte aligned.
- *
- * - \p globalDim array, which specifies tensor size of each of the \p tensorRank dimensions, must be non-zero and less than or
- * equal to 2^32. Additionally, the following requirements need to be met for the packed data types:
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, globalDim[0] must be a multiple of 128.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B, \p globalDim[0] must be a multiple of 2.
- * - Dimension for the packed data types must reflect the number of individual U# values.
- *
- * - \p globalStrides array, which specifies tensor stride of each of the lower \p tensorRank - 1 dimensions in bytes, must be a
- * multiple of 16 and less than 2^40. Additionally, the following requirements need to be met:
- * - When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, the strides must be a multiple of 32.
- * - When \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, the strides must be a multiple of 32.
- * Each following dimension specified includes previous dimension stride:
- * \code
- globalStrides[0] = globalDim[0] * elementSizeInBytes(tensorDataType) + padding[0];
- for (i = 1; i < tensorRank - 1; i++)
- globalStrides[i] = globalStrides[i – 1] * (globalDim[i] + padding[i]);
- assert(globalStrides[i] >= globalDim[i]);
- * \endcode
- *
- * - \p pixelBoxLowerCornerWidth specifies the coordinate offset W of the bounding box from left corner. The offset must be
- * within range [-32768, 32767].
- *
- * - \p pixelBoxUpperCornerWidth specifies the coordinate offset W of the bounding box from right corner. The offset must be
- * within range [-32768, 32767].
- *
- * The bounding box specified by \p pixelBoxLowerCornerWidth and \p pixelBoxUpperCornerWidth must have non-zero area. Note
- * that the size of the box along D and H dimensions is always equal to one.
- *
- * - \p channelsPerPixel, which specifies the number of elements which must be accessed along C dimension, must be less than or equal to 256.
- * Additionally, when \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B or ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, \p channelsPerPixel must be 128.
- *
- * - \p pixelsPerColumn, which specifies the number of elements that must be accessed along the W dimension, must be less than or
- * equal to 1024. This field is ignored when \p mode is ::CU_TENSOR_MAP_IM2COL_WIDE_MODE_W128.
- *
- * - \p elementStrides array, which specifies the iteration step along each of the \p tensorRank dimensions, must be non-zero and less
- * than or equal to 8. Note that when \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE, the first element of this array is ignored since
- * TMA doesn’t support the stride for dimension zero.
- * When all elements of the \p elementStrides array are one, \p boxDim specifies the number of elements to load. However, if \p elementStrides[i]
- * is not equal to one for some \p i, then TMA loads ceil( \p boxDim[i] / \p elementStrides[i]) number of elements along i-th dimension.
- * To load N elements along i-th dimension, \p boxDim[i] must be set to N * \p elementStrides[i].
- *
- * - \p interleave specifies the interleaved layout of type ::CUtensorMapInterleave, which is defined as:
- * \code
- typedef enum CUtensorMapInterleave_enum {
- CU_TENSOR_MAP_INTERLEAVE_NONE = 0,
- CU_TENSOR_MAP_INTERLEAVE_16B,
- CU_TENSOR_MAP_INTERLEAVE_32B
- } CUtensorMapInterleave;
- * \endcode
- * TMA supports interleaved layouts like NC/8HWC8 where C8 utilizes 16 bytes in memory assuming 2 byte per channel or NC/16HWC16 where C16
- * uses 32 bytes.
- * When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE, the bounding box inner dimension (computed as \p channelsPerPixel multiplied by
- * element size in bytes derived from \p tensorDataType) must be less than or equal to the swizzle size.
- * - CU_TENSOR_MAP_SWIZZLE_64B requires the bounding box inner dimension to be <= 64.
- * - CU_TENSOR_MAP_SWIZZLE_128B* require the bounding box inner dimension to be <= 128.
- * Additionally, \p tensorDataType of ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B requires \p interleave to be ::CU_TENSOR_MAP_INTERLEAVE_NONE.
- *
- * - \p mode, which describes loading of elements loaded along the W dimension, has to be one of the following ::CUtensorMapIm2ColWideMode types:
- * \code
- * CU_TENSOR_MAP_IM2COL_WIDE_MODE_W,
- * CU_TENSOR_MAP_IM2COL_WIDE_MODE_W128
- * \endcode
- * ::CU_TENSOR_MAP_IM2COL_WIDE_MODE_W allows the number of elements loaded along the W dimension to be specified
- * via the \p pixelsPerColumn field.
- *
- * - \p swizzle, which specifies the shared memory bank swizzling pattern, must be one of the following
- * ::CUtensorMapSwizzle modes (other swizzle modes are not supported):
- * \code
- typedef enum CUtensorMapSwizzle_enum {
- CU_TENSOR_MAP_SWIZZLE_64B, // Swizzle 16B chunks within 64B span
- CU_TENSOR_MAP_SWIZZLE_128B, // Swizzle 16B chunks within 128B span
- CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B, // Swizzle 32B chunks within 128B span
- } CUtensorMapSwizzle;
- * \endcode
- * Data are organized in a specific order in global memory; however, this may not match the order in which the application accesses data
- * in shared memory. This difference in data organization may cause bank conflicts when shared memory is accessed. In order to avoid this
- * problem, data can be loaded to shared memory with shuffling across shared memory banks.
- * When the \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B, only the following swizzle modes are supported:
- * - CU_TENSOR_MAP_SWIZZLE_128B (Load & Store)
- * - CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B (Load & Store)
- * When the \p tensorDataType is ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, only the following swizzle modes are supported:
- * - CU_TENSOR_MAP_SWIZZLE_128B (Load only)
- * - CU_TENSOR_MAP_SWIZZLE_128B_ATOM_32B (Load only)
- *
- * - \p l2Promotion specifies L2 fetch size which indicates the byte granularity at which L2 requests are filled from DRAM. It must be of
- * type ::CUtensorMapL2promotion, which is defined as:
- * \code
- typedef enum CUtensorMapL2promotion_enum {
- CU_TENSOR_MAP_L2_PROMOTION_NONE = 0,
- CU_TENSOR_MAP_L2_PROMOTION_L2_64B,
- CU_TENSOR_MAP_L2_PROMOTION_L2_128B,
- CU_TENSOR_MAP_L2_PROMOTION_L2_256B
- } CUtensorMapL2promotion;
- * \endcode
- *
- * - \p oobFill, which indicates whether zero or a special NaN constant should be used to fill out-of-bound elements, must be of type
- * ::CUtensorMapFloatOOBfill which is defined as:
- * \code
- typedef enum CUtensorMapFloatOOBfill_enum {
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0,
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA
- } CUtensorMapFloatOOBfill;
- * \endcode
- * Note that ::CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA can only be used when \p tensorDataType represents a floating-point data type,
- * and when \p tensorDataType is not ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B, ::CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B, and ::CU_TENSOR_MAP_DATA_TYPE_16U6_ALIGN16B.
- *
- * \param tensorMap - Tensor map object to create
- * \param tensorDataType - Tensor data type
- * \param tensorRank - Dimensionality of tensor; must be at least 3
- * \param globalAddress - Starting address of memory region described by tensor
- * \param globalDim - Array containing tensor size (number of elements) along each of the \p tensorRank dimensions
- * \param globalStrides - Array containing stride size (in bytes) along each of the \p tensorRank - 1 dimensions
- * \param pixelBoxLowerCornerWidth - Width offset of left box corner
- * \param pixelBoxUpperCornerWidth - Width offset of right box corner
- * \param channelsPerPixel - Number of channels per pixel
- * \param pixelsPerColumn - Number of pixels per column
- * \param elementStrides - Array containing traversal stride in each of the \p tensorRank dimensions
- * \param interleave - Type of interleaved layout the tensor addresses
- * \param mode - W or W128 mode
- * \param swizzle - Bank swizzling pattern inside shared memory
- * \param l2Promotion - L2 promotion size
- * \param oobFill - Indicate whether zero or special NaN constant will be used to fill out-of-bound elements
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTensorMapEncodeTiled,
- * ::cuTensorMapEncodeIm2col,
- * ::cuTensorMapReplaceAddress
- */
- CUresult CUDAAPI cuTensorMapEncodeIm2colWide(CUtensorMap *tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void *globalAddress, const cuuint64_t *globalDim, const cuuint64_t *globalStrides, int pixelBoxLowerCornerWidth, int pixelBoxUpperCornerWidth, cuuint32_t channelsPerPixel, cuuint32_t pixelsPerColumn, const cuuint32_t *elementStrides, CUtensorMapInterleave interleave, CUtensorMapIm2ColWideMode mode, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill);
- /**
- * \brief Modify an existing tensor map descriptor with an updated global address
- *
- * Modifies the descriptor for Tensor Memory Access (TMA) object passed in \p tensorMap with
- * an updated \p globalAddress.
- *
- * Tensor map objects are only supported on devices of compute capability 9.0 or higher.
- * Additionally, a tensor map object is an opaque value, and, as such, should only be
- * accessed through CUDA API calls.
- *
- * \param tensorMap - Tensor map object to modify
- * \param globalAddress - Starting address of memory region described by tensor, must follow previous alignment requirements
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuTensorMapEncodeTiled,
- * ::cuTensorMapEncodeIm2col
- * ::cuTensorMapEncodeIm2colWide
- */
- CUresult CUDAAPI cuTensorMapReplaceAddress(CUtensorMap *tensorMap, void *globalAddress);
- /** @} */
- /* END CUDA_TENSOR_MEMORY */
- /**
- * \defgroup CUDA_PEER_ACCESS Peer Context Memory Access
- *
- * ___MANBRIEF___ direct peer context memory access functions of the low-level
- * CUDA driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the direct peer context memory access functions
- * of the low-level CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Queries if a device may directly access a peer device's memory.
- *
- * Returns in \p *canAccessPeer a value of 1 if contexts on \p dev are capable of
- * directly accessing memory from contexts on \p peerDev and 0 otherwise.
- * If direct access of \p peerDev from \p dev is possible, then access may be
- * enabled on two specific contexts by calling ::cuCtxEnablePeerAccess().
- *
- * \param canAccessPeer - Returned access capability
- * \param dev - Device from which allocations on \p peerDev are to
- * be directly accessed.
- * \param peerDev - Device on which the allocations to be directly accessed
- * by \p dev reside.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_DEVICE
- * \notefnerr
- *
- * \sa
- * ::cuCtxEnablePeerAccess,
- * ::cuCtxDisablePeerAccess,
- * ::cudaDeviceCanAccessPeer
- */
- CUresult CUDAAPI cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, CUdevice peerDev);
- /**
- * \brief Enables direct access to memory allocations in a peer context.
- *
- * If both the current context and \p peerContext are on devices which support unified
- * addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING) and same
- * major compute capability, then on success all allocations from \p peerContext will
- * immediately be accessible by the current context. See \ref CUDA_UNIFIED for additional
- * details.
- *
- * Note that access granted by this call is unidirectional and that in order to access
- * memory from the current context in \p peerContext, a separate symmetric call
- * to ::cuCtxEnablePeerAccess() is required.
- *
- * Note that there are both device-wide and system-wide limitations per system
- * configuration, as noted in the CUDA Programming Guide under the section
- * "Peer-to-Peer Memory Access".
- *
- * Returns ::CUDA_ERROR_PEER_ACCESS_UNSUPPORTED if ::cuDeviceCanAccessPeer() indicates
- * that the ::CUdevice of the current context cannot directly access memory
- * from the ::CUdevice of \p peerContext.
- *
- * Returns ::CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED if direct access of
- * \p peerContext from the current context has already been enabled.
- *
- * Returns ::CUDA_ERROR_TOO_MANY_PEERS if direct peer access is not possible
- * because hardware resources required for peer access have been exhausted.
- *
- * Returns ::CUDA_ERROR_INVALID_CONTEXT if there is no current context, \p peerContext
- * is not a valid context, or if the current context is \p peerContext.
- *
- * Returns ::CUDA_ERROR_INVALID_VALUE if \p Flags is not 0.
- *
- * \param peerContext - Peer context to enable direct access to from the current context
- * \param Flags - Reserved for future use and must be set to 0
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED,
- * ::CUDA_ERROR_TOO_MANY_PEERS,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_PEER_ACCESS_UNSUPPORTED,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::cuDeviceCanAccessPeer,
- * ::cuCtxDisablePeerAccess,
- * ::cudaDeviceEnablePeerAccess
- */
- CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags);
- /**
- * \brief Disables direct access to memory allocations in a peer context and
- * unregisters any registered allocations.
- *
- Returns ::CUDA_ERROR_PEER_ACCESS_NOT_ENABLED if direct peer access has
- * not yet been enabled from \p peerContext to the current context.
- *
- * Returns ::CUDA_ERROR_INVALID_CONTEXT if there is no current context, or if
- * \p peerContext is not a valid context.
- *
- * \param peerContext - Peer context to disable direct access to
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_PEER_ACCESS_NOT_ENABLED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * \notefnerr
- *
- * \sa
- * ::cuDeviceCanAccessPeer,
- * ::cuCtxEnablePeerAccess,
- * ::cudaDeviceDisablePeerAccess
- */
- CUresult CUDAAPI cuCtxDisablePeerAccess(CUcontext peerContext);
- /**
- * \brief Queries attributes of the link between two devices.
- *
- * Returns in \p *value the value of the requested attribute \p attrib of the
- * link between \p srcDevice and \p dstDevice. The supported attributes are:
- * - ::CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK: A relative value indicating the
- * performance of the link between two devices.
- * - ::CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED P2P: 1 if P2P Access is enable.
- * - ::CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED: 1 if Atomic operations over
- * the link are supported.
- * - ::CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED: 1 if cudaArray can
- * be accessed over the link.
- *
- * Returns ::CUDA_ERROR_INVALID_DEVICE if \p srcDevice or \p dstDevice are not valid
- * or if they represent the same device.
- *
- * Returns ::CUDA_ERROR_INVALID_VALUE if \p attrib is not valid or if \p value is
- * a null pointer.
- *
- * \param value - Returned value of the requested attribute
- * \param attrib - The requested attribute of the link between \p srcDevice and \p dstDevice.
- * \param srcDevice - The source device of the target link.
- * \param dstDevice - The destination device of the target link.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_VALUE
- * \notefnerr
- *
- * \sa
- * ::cuCtxEnablePeerAccess,
- * ::cuCtxDisablePeerAccess,
- * ::cuDeviceCanAccessPeer,
- * ::cudaDeviceGetP2PAttribute
- */
- CUresult CUDAAPI cuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice);
- /** @} */ /* END CUDA_PEER_ACCESS */
- /**
- * \defgroup CUDA_GRAPHICS Graphics Interoperability
- *
- * ___MANBRIEF___ graphics interoperability functions of the low-level CUDA
- * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the graphics interoperability functions of the
- * low-level CUDA driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Unregisters a graphics resource for access by CUDA
- *
- * Unregisters the graphics resource \p resource so it is not accessible by
- * CUDA unless registered again.
- *
- * If \p resource is invalid then ::CUDA_ERROR_INVALID_HANDLE is
- * returned.
- *
- * \param resource - Resource to unregister
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_UNKNOWN
- * \notefnerr
- *
- * \sa
- * ::cuGraphicsD3D9RegisterResource,
- * ::cuGraphicsD3D10RegisterResource,
- * ::cuGraphicsD3D11RegisterResource,
- * ::cuGraphicsGLRegisterBuffer,
- * ::cuGraphicsGLRegisterImage,
- * ::cudaGraphicsUnregisterResource
- */
- CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource);
- /**
- * \brief Get an array through which to access a subresource of a mapped graphics resource.
- *
- * Returns in \p *pArray an array through which the subresource of the mapped
- * graphics resource \p resource which corresponds to array index \p arrayIndex
- * and mipmap level \p mipLevel may be accessed. The value set in \p *pArray may
- * change every time that \p resource is mapped.
- *
- * If \p resource is not a texture then it cannot be accessed via an array and
- * ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned.
- * If \p arrayIndex is not a valid array index for \p resource then
- * ::CUDA_ERROR_INVALID_VALUE is returned.
- * If \p mipLevel is not a valid mipmap level for \p resource then
- * ::CUDA_ERROR_INVALID_VALUE is returned.
- * If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned.
- *
- * \param pArray - Returned array through which a subresource of \p resource may be accessed
- * \param resource - Mapped resource to access
- * \param arrayIndex - Array index for array textures or cubemap face
- * index as defined by ::CUarray_cubemap_face for
- * cubemap textures for the subresource to access
- * \param mipLevel - Mipmap level for the subresource to access
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_MAPPED,
- * ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY
- * \notefnerr
- *
- * \sa
- * ::cuGraphicsResourceGetMappedPointer,
- * ::cudaGraphicsSubResourceGetMappedArray
- */
- CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
- /**
- * \brief Get a mipmapped array through which to access a mapped graphics resource.
- *
- * Returns in \p *pMipmappedArray a mipmapped array through which the mapped graphics
- * resource \p resource. The value set in \p *pMipmappedArray may change every time
- * that \p resource is mapped.
- *
- * If \p resource is not a texture then it cannot be accessed via a mipmapped array and
- * ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned.
- * If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned.
- *
- * \param pMipmappedArray - Returned mipmapped array through which \p resource may be accessed
- * \param resource - Mapped resource to access
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_MAPPED,
- * ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY
- * \notefnerr
- *
- * \sa
- * ::cuGraphicsResourceGetMappedPointer,
- * ::cudaGraphicsResourceGetMappedMipmappedArray
- */
- CUresult CUDAAPI cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray *pMipmappedArray, CUgraphicsResource resource);
- /**
- * \brief Get a device pointer through which to access a mapped graphics resource.
- *
- * Returns in \p *pDevPtr a pointer through which the mapped graphics resource
- * \p resource may be accessed.
- * Returns in \p pSize the size of the memory in bytes which may be accessed from that pointer.
- * The value set in \p pPointer may change every time that \p resource is mapped.
- *
- * If \p resource is not a buffer then it cannot be accessed via a pointer and
- * ::CUDA_ERROR_NOT_MAPPED_AS_POINTER is returned.
- * If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned.
- * *
- * \param pDevPtr - Returned pointer through which \p resource may be accessed
- * \param pSize - Returned size of the buffer accessible starting at \p *pPointer
- * \param resource - Mapped resource to access
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_MAPPED,
- * ::CUDA_ERROR_NOT_MAPPED_AS_POINTER
- * \notefnerr
- *
- * \sa
- * ::cuGraphicsMapResources,
- * ::cuGraphicsSubResourceGetMappedArray,
- * ::cudaGraphicsResourceGetMappedPointer
- */
- CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource);
- /**
- * \brief Set usage flags for mapping a graphics resource
- *
- * Set \p flags for mapping the graphics resource \p resource.
- *
- * Changes to \p flags will take effect the next time \p resource is mapped.
- * The \p flags argument may be any of the following:
- * - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this
- * resource will be used. It is therefore assumed that this resource will be
- * read from and written to by CUDA kernels. This is the default value.
- * - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY: Specifies that CUDA kernels which
- * access this resource will not write to this resource.
- * - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA kernels
- * which access this resource will not read from this resource and will
- * write over the entire contents of the resource, so none of the data
- * previously stored in the resource will be preserved.
- *
- * If \p resource is presently mapped for access by CUDA then
- * ::CUDA_ERROR_ALREADY_MAPPED is returned.
- * If \p flags is not one of the above values then ::CUDA_ERROR_INVALID_VALUE is returned.
- *
- * \param resource - Registered resource to set flags for
- * \param flags - Parameters for resource mapping
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_ALREADY_MAPPED
- * \notefnerr
- *
- * \sa
- * ::cuGraphicsMapResources,
- * ::cudaGraphicsResourceSetMapFlags
- */
- CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags);
- /**
- * \brief Map graphics resources for access by CUDA
- *
- * Maps the \p count graphics resources in \p resources for access by CUDA.
- *
- * The resources in \p resources may be accessed by CUDA until they
- * are unmapped. The graphics API from which \p resources were registered
- * should not access any resources while they are mapped by CUDA. If an
- * application does so, the results are undefined.
- *
- * This function provides the synchronization guarantee that any graphics calls
- * issued before ::cuGraphicsMapResources() will complete before any subsequent CUDA
- * work issued in \p stream begins.
- *
- * If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID_HANDLE is returned.
- * If any of \p resources are presently mapped for access by CUDA then ::CUDA_ERROR_ALREADY_MAPPED is returned.
- *
- * \param count - Number of resources to map
- * \param resources - Resources to map for CUDA usage
- * \param hStream - Stream with which to synchronize
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_ALREADY_MAPPED,
- * ::CUDA_ERROR_UNKNOWN
- * \note_null_stream
- * \notefnerr
- *
- * \sa
- * ::cuGraphicsResourceGetMappedPointer,
- * ::cuGraphicsSubResourceGetMappedArray,
- * ::cuGraphicsUnmapResources,
- * ::cudaGraphicsMapResources
- */
- CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
- /**
- * \brief Unmap graphics resources.
- *
- * Unmaps the \p count graphics resources in \p resources.
- *
- * Once unmapped, the resources in \p resources may not be accessed by CUDA
- * until they are mapped again.
- *
- * This function provides the synchronization guarantee that any CUDA work issued
- * in \p stream before ::cuGraphicsUnmapResources() will complete before any
- * subsequently issued graphics work begins.
- *
- *
- * If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID_HANDLE is returned.
- * If any of \p resources are not presently mapped for access by CUDA then ::CUDA_ERROR_NOT_MAPPED is returned.
- *
- * \param count - Number of resources to unmap
- * \param resources - Resources to unmap
- * \param hStream - Stream with which to synchronize
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_NOT_MAPPED,
- * ::CUDA_ERROR_UNKNOWN
- * \note_null_stream
- * \notefnerr
- *
- * \sa
- * ::cuGraphicsMapResources,
- * ::cudaGraphicsUnmapResources
- */
- CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
- /** @} */ /* END CUDA_GRAPHICS */
- /**
- * \defgroup CUDA_DRIVER_ENTRY_POINT Driver Entry Point Access
- *
- * ___MANBRIEF___ driver entry point access functions of the low-level CUDA driver API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the driver entry point access functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * \brief Returns the requested driver API function pointer
- *
- * Returns in \p **pfn the address of the CUDA driver function for the requested
- * CUDA version and flags.
- *
- * The CUDA version is specified as (1000 * major + 10 * minor), so CUDA 11.2
- * should be specified as 11020. For a requested driver symbol, if the specified
- * CUDA version is greater than or equal to the CUDA version in which the driver symbol
- * was introduced, this API will return the function pointer to the corresponding
- * versioned function.
- *
- * The pointer returned by the API should be cast to a function pointer matching the
- * requested driver function's definition in the API header file. The function pointer
- * typedef can be picked up from the corresponding typedefs header file. For example,
- * cudaTypedefs.h consists of function pointer typedefs for driver APIs defined in cuda.h.
- *
- * The API will return ::CUDA_SUCCESS and set the returned \p pfn to NULL if the
- * requested driver function is not supported on the platform, no ABI
- * compatible driver function exists for the specified \p cudaVersion or if the
- * driver symbol is invalid.
- *
- * It will also set the optional \p symbolStatus to one of the values in
- * ::CUdriverProcAddressQueryResult with the following meanings:
- * - ::CU_GET_PROC_ADDRESS_SUCCESS - The requested symbol was succesfully found based
- * on input arguments and \p pfn is valid
- * - ::CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND - The requested symbol was not found
- * - ::CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT - The requested symbol was found but is
- * not supported by cudaVersion specified
- *
- * The requested flags can be:
- * - ::CU_GET_PROC_ADDRESS_DEFAULT: This is the default mode. This is equivalent to
- * ::CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM if the code is compiled with
- * --default-stream per-thread compilation flag or the macro CUDA_API_PER_THREAD_DEFAULT_STREAM
- * is defined; ::CU_GET_PROC_ADDRESS_LEGACY_STREAM otherwise.
- * - ::CU_GET_PROC_ADDRESS_LEGACY_STREAM: This will enable the search for all driver symbols
- * that match the requested driver symbol name except the corresponding per-thread versions.
- * - ::CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM: This will enable the search for all
- * driver symbols that match the requested driver symbol name including the per-thread
- * versions. If a per-thread version is not found, the API will return the legacy version
- * of the driver function.
- *
- * \param symbol - The base name of the driver API function to look for. As an example,
- * for the driver API ::cuMemAlloc_v2, \p symbol would be cuMemAlloc and
- * \p cudaVersion would be the ABI compatible CUDA version for the _v2 variant.
- * \param pfn - Location to return the function pointer to the requested driver function
- * \param cudaVersion - The CUDA version to look for the requested driver symbol
- * \param flags - Flags to specify search options.
- * \param symbolStatus - Optional location to store the status of the search for
- * \p symbol based on \p cudaVersion. See ::CUdriverProcAddressQueryResult
- * for possible values.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED
- * \note_version_mixing
- *
- * \sa
- * ::cudaGetDriverEntryPoint
- */
- CUresult CUDAAPI cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult *symbolStatus);
- /** @} */ /* END CUDA_DRIVER_ENTRY_POINT */
- /**
- * \defgroup CUDA_COREDUMP Coredump Attributes Control API
- *
- * ___MANBRIEF___ coredump attribute control functions for the low-level CUDA API
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the coredump attribute control functions of the low-level CUDA
- * driver application programming interface.
- *
- * @{
- */
- /**
- * Flags for choosing a coredump attribute to get/set
- */
- typedef enum CUcoredumpSettings_enum {
- CU_COREDUMP_ENABLE_ON_EXCEPTION = 1,
- CU_COREDUMP_TRIGGER_HOST,
- CU_COREDUMP_LIGHTWEIGHT,
- CU_COREDUMP_ENABLE_USER_TRIGGER,
- CU_COREDUMP_FILE,
- CU_COREDUMP_PIPE,
- CU_COREDUMP_GENERATION_FLAGS,
- CU_COREDUMP_MAX
- } CUcoredumpSettings;
- /**
- * Flags for controlling coredump contents
- */
- typedef enum CUCoredumpGenerationFlags {
- CU_COREDUMP_DEFAULT_FLAGS = 0,
- CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES = (1 << 0),
- CU_COREDUMP_SKIP_GLOBAL_MEMORY = (1 << 1),
- CU_COREDUMP_SKIP_SHARED_MEMORY = (1 << 2),
- CU_COREDUMP_SKIP_LOCAL_MEMORY = (1 << 3),
- CU_COREDUMP_SKIP_ABORT = (1 << 4),
- CU_COREDUMP_SKIP_CONSTBANK_MEMORY = (1 << 5),
- CU_COREDUMP_LIGHTWEIGHT_FLAGS = CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES
- | CU_COREDUMP_SKIP_GLOBAL_MEMORY
- | CU_COREDUMP_SKIP_SHARED_MEMORY
- | CU_COREDUMP_SKIP_LOCAL_MEMORY
- | CU_COREDUMP_SKIP_CONSTBANK_MEMORY
- } CUCoredumpGenerationFlags;
- /**
- * \brief Allows caller to fetch a coredump attribute value for the current context
- *
- * Returns in \p *value the requested value specified by \p attrib. It is up to the caller
- * to ensure that the data type and size of \p *value matches the request.
- *
- * If the caller calls this function with \p *value equal to NULL, the size of the memory
- * region (in bytes) expected for \p attrib will be placed in \p size.
- *
- * The supported attributes are:
- * - ::CU_COREDUMP_ENABLE_ON_EXCEPTION: Bool where ::true means that GPU exceptions from
- * this context will create a coredump at the location specified by ::CU_COREDUMP_FILE.
- * The default value is ::false unless set to ::true globally or locally, or the
- * CU_CTX_USER_COREDUMP_ENABLE flag was set during context creation.
- * - ::CU_COREDUMP_TRIGGER_HOST: Bool where ::true means that the host CPU will
- * also create a coredump. The default value is ::true unless set to ::false globally or
- * or locally. This value is deprecated as of CUDA 12.5 - raise the ::CU_COREDUMP_SKIP_ABORT
- * flag to disable host device abort() if needed.
- * - ::CU_COREDUMP_LIGHTWEIGHT: Bool where ::true means that any resulting coredumps
- * will not have a dump of GPU memory or non-reloc ELF images. The default value is
- * ::false unless set to ::true globally or locally. This attribute is deprecated as
- * of CUDA 12.5, please use ::CU_COREDUMP_GENERATION_FLAGS instead.
- * - ::CU_COREDUMP_ENABLE_USER_TRIGGER: Bool where ::true means that a coredump can be
- * created by writing to the system pipe specified by ::CU_COREDUMP_PIPE. The default
- * value is ::false unless set to ::true globally or locally.
- * - ::CU_COREDUMP_FILE: String of up to 1023 characters that defines the location where
- * any coredumps generated by this context will be written. The default value is
- * ::core.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running
- * the CUDA applications and ::PID is the process ID of the CUDA application.
- * - ::CU_COREDUMP_PIPE: String of up to 1023 characters that defines the name of the pipe
- * that will be monitored if user-triggered coredumps are enabled. The default value is
- * ::corepipe.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running
- * the CUDA application and ::PID is the process ID of the CUDA application.
- * - ::CU_COREDUMP_GENERATION_FLAGS: An integer with values to allow granular control the data
- * contained in a coredump specified as a bitwise OR combination of the following values:
- * + ::CU_COREDUMP_DEFAULT_FLAGS - if set by itself, coredump generation returns to its
- * default settings of including all memory regions that it is able to access
- * + ::CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES - Coredump will not include the data from
- * CUDA source modules that are not relocated at runtime.
- * + ::CU_COREDUMP_SKIP_GLOBAL_MEMORY - Coredump will not include device-side global data
- * that does not belong to any context.
- * + ::CU_COREDUMP_SKIP_SHARED_MEMORY - Coredump will not include grid-scale shared memory
- * for the warp that the dumped kernel belonged to.
- * + ::CU_COREDUMP_SKIP_LOCAL_MEMORY - Coredump will not include local memory from the kernel.
- * + ::CU_COREDUMP_LIGHTWEIGHT_FLAGS - Enables all of the above options. Equiavlent to setting
- * the ::CU_COREDUMP_LIGHTWEIGHT attribute to ::true.
- * + ::CU_COREDUMP_SKIP_ABORT - If set, GPU exceptions will not raise an abort() in the host CPU
- * process. Same functional goal as ::CU_COREDUMP_TRIGGER_HOST but better reflects the default
- * behavior.
- *
- * \param attrib - The enum defining which value to fetch.
- * \param value - void* containing the requested data.
- * \param size - The size of the memory region \p value points to.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED
- *
- * \sa
- * ::cuCoredumpGetAttributeGlobal,
- * ::cuCoredumpSetAttribute,
- * ::cuCoredumpSetAttributeGlobal
- */
- CUresult CUDAAPI cuCoredumpGetAttribute(CUcoredumpSettings attrib, void* value, size_t *size);
- /**
- * \brief Allows caller to fetch a coredump attribute value for the entire application
- *
- * Returns in \p *value the requested value specified by \p attrib. It is up to the caller
- * to ensure that the data type and size of \p *value matches the request.
- *
- * If the caller calls this function with \p *value equal to NULL, the size of the memory
- * region (in bytes) expected for \p attrib will be placed in \p size.
- *
- * The supported attributes are:
- * - ::CU_COREDUMP_ENABLE_ON_EXCEPTION: Bool where ::true means that GPU exceptions from
- * this context will create a coredump at the location specified by ::CU_COREDUMP_FILE.
- * The default value is ::false.
- * - ::CU_COREDUMP_TRIGGER_HOST: Bool where ::true means that the host CPU will
- * also create a coredump. The default value is ::true unless set to ::false globally or
- * or locally. This value is deprecated as of CUDA 12.5 - raise the ::CU_COREDUMP_SKIP_ABORT
- * flag to disable host device abort() if needed.
- * - ::CU_COREDUMP_LIGHTWEIGHT: Bool where ::true means that any resulting coredumps
- * will not have a dump of GPU memory or non-reloc ELF images. The default value is
- * ::false. This attribute is deprecated as of CUDA 12.5, please use ::CU_COREDUMP_GENERATION_FLAGS
- * instead.
- * - ::CU_COREDUMP_ENABLE_USER_TRIGGER: Bool where ::true means that a coredump can be
- * created by writing to the system pipe specified by ::CU_COREDUMP_PIPE. The default
- * value is ::false.
- * - ::CU_COREDUMP_FILE: String of up to 1023 characters that defines the location where
- * any coredumps generated by this context will be written. The default value is
- * ::core.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running
- * the CUDA applications and ::PID is the process ID of the CUDA application.
- * - ::CU_COREDUMP_PIPE: String of up to 1023 characters that defines the name of the pipe
- * that will be monitored if user-triggered coredumps are enabled. The default value is
- * ::corepipe.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running
- * the CUDA application and ::PID is the process ID of the CUDA application.
- * - ::CU_COREDUMP_GENERATION_FLAGS: An integer with values to allow granular control the data
- * contained in a coredump specified as a bitwise OR combination of the following values:
- * + ::CU_COREDUMP_DEFAULT_FLAGS - if set by itself, coredump generation returns to its
- * default settings of including all memory regions that it is able to access
- * + ::CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES - Coredump will not include the data from
- * CUDA source modules that are not relocated at runtime.
- * + ::CU_COREDUMP_SKIP_GLOBAL_MEMORY - Coredump will not include device-side global data
- * that does not belong to any context.
- * + ::CU_COREDUMP_SKIP_SHARED_MEMORY - Coredump will not include grid-scale shared memory
- * for the warp that the dumped kernel belonged to.
- * + ::CU_COREDUMP_SKIP_LOCAL_MEMORY - Coredump will not include local memory from the kernel.
- * + ::CU_COREDUMP_LIGHTWEIGHT_FLAGS - Enables all of the above options. Equiavlent to setting
- * the ::CU_COREDUMP_LIGHTWEIGHT attribute to ::true.
- * + ::CU_COREDUMP_SKIP_ABORT - If set, GPU exceptions will not raise an abort() in the host CPU
- * process. Same functional goal as ::CU_COREDUMP_TRIGGER_HOST but better reflects the default
- * behavior.
- *
- * \param attrib - The enum defining which value to fetch.
- * \param value - void* containing the requested data.
- * \param size - The size of the memory region \p value points to.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuCoredumpGetAttribute,
- * ::cuCoredumpSetAttribute,
- * ::cuCoredumpSetAttributeGlobal
- */
- CUresult CUDAAPI cuCoredumpGetAttributeGlobal(CUcoredumpSettings attrib, void *value, size_t *size);
- /**
- * \brief Allows caller to set a coredump attribute value for the current context
- *
- * This function should be considered an alternate interface to the CUDA-GDB environment
- * variables defined in this document: https://docs.nvidia.com/cuda/cuda-gdb/index.html#gpu-coredump
- *
- * An important design decision to note is that any coredump environment variable values
- * set before CUDA initializes will take permanent precedence over any values set with this
- * function. This decision was made to ensure no change in behavior for any users that
- * may be currently using these variables to get coredumps.
- *
- * \p *value shall contain the requested value specified by \p set. It is up to the caller
- * to ensure that the data type and size of \p *value matches the request.
- *
- * If the caller calls this function with \p *value equal to NULL, the size of the memory
- * region (in bytes) expected for \p set will be placed in \p size.
- *
- * /note This function will return ::CUDA_ERROR_NOT_SUPPORTED if the caller attempts to set
- * ::CU_COREDUMP_ENABLE_ON_EXCEPTION on a GPU of with Compute Capability < 6.0. ::cuCoredumpSetAttributeGlobal
- * works on those platforms as an alternative.
- *
- * /note ::CU_COREDUMP_ENABLE_USER_TRIGGER and ::CU_COREDUMP_PIPE cannot be set on a per-context basis.
- *
- * The supported attributes are:
- * - ::CU_COREDUMP_ENABLE_ON_EXCEPTION: Bool where ::true means that GPU exceptions from
- * this context will create a coredump at the location specified by ::CU_COREDUMP_FILE.
- * The default value is ::false.
- * - ::CU_COREDUMP_TRIGGER_HOST: Bool where ::true means that the host CPU will
- * also create a coredump. The default value is ::true unless set to ::false globally or
- * or locally. This value is deprecated as of CUDA 12.5 - raise the ::CU_COREDUMP_SKIP_ABORT
- * flag to disable host device abort() if needed.
- * - ::CU_COREDUMP_LIGHTWEIGHT: Bool where ::true means that any resulting coredumps
- * will not have a dump of GPU memory or non-reloc ELF images. The default value is
- * ::false. This attribute is deprecated as of CUDA 12.5, please use ::CU_COREDUMP_GENERATION_FLAGS
- * instead.
- * - ::CU_COREDUMP_FILE: String of up to 1023 characters that defines the location where
- * any coredumps generated by this context will be written. The default value is
- * ::core.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running
- * the CUDA applications and ::PID is the process ID of the CUDA application.
- * - ::CU_COREDUMP_GENERATION_FLAGS: An integer with values to allow granular control the data
- * contained in a coredump specified as a bitwise OR combination of the following values:
- * + ::CU_COREDUMP_DEFAULT_FLAGS - if set by itself, coredump generation returns to its
- * default settings of including all memory regions that it is able to access
- * + ::CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES - Coredump will not include the data from
- * CUDA source modules that are not relocated at runtime.
- * + ::CU_COREDUMP_SKIP_GLOBAL_MEMORY - Coredump will not include device-side global data
- * that does not belong to any context.
- * + ::CU_COREDUMP_SKIP_SHARED_MEMORY - Coredump will not include grid-scale shared memory
- * for the warp that the dumped kernel belonged to.
- * + ::CU_COREDUMP_SKIP_LOCAL_MEMORY - Coredump will not include local memory from the kernel.
- * + ::CU_COREDUMP_LIGHTWEIGHT_FLAGS - Enables all of the above options. Equiavlent to setting
- * the ::CU_COREDUMP_LIGHTWEIGHT attribute to ::true.
- * + ::CU_COREDUMP_SKIP_ABORT - If set, GPU exceptions will not raise an abort() in the host CPU
- * process. Same functional goal as ::CU_COREDUMP_TRIGGER_HOST but better reflects the default
- * behavior.
- *
- * \param attrib - The enum defining which value to set.
- * \param value - void* containing the requested data.
- * \param size - The size of the memory region \p value points to.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_PERMITTED,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED,
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa
- * ::cuCoredumpGetAttributeGlobal,
- * ::cuCoredumpGetAttribute,
- * ::cuCoredumpSetAttributeGlobal
- */
- CUresult CUDAAPI cuCoredumpSetAttribute(CUcoredumpSettings attrib, void* value, size_t *size);
- /**
- * \brief Allows caller to set a coredump attribute value globally
- *
- * This function should be considered an alternate interface to the CUDA-GDB environment
- * variables defined in this document: https://docs.nvidia.com/cuda/cuda-gdb/index.html#gpu-coredump
- *
- * An important design decision to note is that any coredump environment variable values
- * set before CUDA initializes will take permanent precedence over any values set with this
- * function. This decision was made to ensure no change in behavior for any users that
- * may be currently using these variables to get coredumps.
- *
- * \p *value shall contain the requested value specified by \p set. It is up to the caller
- * to ensure that the data type and size of \p *value matches the request.
- *
- * If the caller calls this function with \p *value equal to NULL, the size of the memory
- * region (in bytes) expected for \p set will be placed in \p size.
- *
- * The supported attributes are:
- * - ::CU_COREDUMP_ENABLE_ON_EXCEPTION: Bool where ::true means that GPU exceptions from
- * this context will create a coredump at the location specified by ::CU_COREDUMP_FILE.
- * The default value is ::false.
- * - ::CU_COREDUMP_TRIGGER_HOST: Bool where ::true means that the host CPU will
- * also create a coredump. The default value is ::true unless set to ::false globally or
- * or locally. This value is deprecated as of CUDA 12.5 - raise the ::CU_COREDUMP_SKIP_ABORT
- * flag to disable host device abort() if needed.
- * - ::CU_COREDUMP_LIGHTWEIGHT: Bool where ::true means that any resulting coredumps
- * will not have a dump of GPU memory or non-reloc ELF images. The default value is
- * ::false. This attribute is deprecated as of CUDA 12.5, please use ::CU_COREDUMP_GENERATION_FLAGS
- * instead.
- * - ::CU_COREDUMP_ENABLE_USER_TRIGGER: Bool where ::true means that a coredump can be
- * created by writing to the system pipe specified by ::CU_COREDUMP_PIPE. The default
- * value is ::false.
- * - ::CU_COREDUMP_FILE: String of up to 1023 characters that defines the location where
- * any coredumps generated by this context will be written. The default value is
- * ::core.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running
- * the CUDA applications and ::PID is the process ID of the CUDA application.
- * - ::CU_COREDUMP_PIPE: String of up to 1023 characters that defines the name of the pipe
- * that will be monitored if user-triggered coredumps are enabled. This value may not be
- * changed after ::CU_COREDUMP_ENABLE_USER_TRIGGER is set to ::true. The default
- * value is ::corepipe.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine
- * running the CUDA application and ::PID is the process ID of the CUDA application.
- * - ::CU_COREDUMP_GENERATION_FLAGS: An integer with values to allow granular control the data
- * contained in a coredump specified as a bitwise OR combination of the following values:
- * + ::CU_COREDUMP_DEFAULT_FLAGS - if set by itself, coredump generation returns to its
- * default settings of including all memory regions that it is able to access
- * + ::CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES - Coredump will not include the data from
- * CUDA source modules that are not relocated at runtime.
- * + ::CU_COREDUMP_SKIP_GLOBAL_MEMORY - Coredump will not include device-side global data
- * that does not belong to any context.
- * + ::CU_COREDUMP_SKIP_SHARED_MEMORY - Coredump will not include grid-scale shared memory
- * for the warp that the dumped kernel belonged to.
- * + ::CU_COREDUMP_SKIP_LOCAL_MEMORY - Coredump will not include local memory from the kernel.
- * + ::CU_COREDUMP_LIGHTWEIGHT_FLAGS - Enables all of the above options. Equiavlent to setting
- * the ::CU_COREDUMP_LIGHTWEIGHT attribute to ::true.
- * + ::CU_COREDUMP_SKIP_ABORT - If set, GPU exceptions will not raise an abort() in the host CPU
- * process. Same functional goal as ::CU_COREDUMP_TRIGGER_HOST but better reflects the default
- * behavior.
- *
- * \param attrib - The enum defining which value to set.
- * \param value - void* containing the requested data.
- * \param size - The size of the memory region \p value points to.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_PERMITTED
- *
- * \sa
- * ::cuCoredumpGetAttribute,
- * ::cuCoredumpGetAttributeGlobal,
- * ::cuCoredumpSetAttribute
- */
- CUresult CUDAAPI cuCoredumpSetAttributeGlobal(CUcoredumpSettings attrib, void *value, size_t *size);
- /** @} */ /* END CUDA_COREDUMP */
- CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId);
- /*
- ** ******************* GREEN CONTEXTS **********************
- */
- /**
- * \defgroup CUDA_GREEN_CONTEXTS Green Contexts
- *
- * ___MANBRIEF___ Driver level API for creation and manipulation of green contexts
- * (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This section describes the APIs for creation and manipulation of green contexts in the CUDA
- * driver. Green contexts are a lightweight alternative to traditional contexts, with the ability
- * to pass in a set of resources that they should be initialized with. This allows the developer to
- * represent distinct spatial partitions of the GPU, provision resources for them, and target them
- * via the same programming model that CUDA exposes (streams, kernel launches, etc.).
- *
- * There are 4 main steps to using these new set of APIs.
- * - (1) Start with an initial set of resources, for example via ::cuDeviceGetDevResource. Only SM type is supported today.
- * - (2) Partition this set of resources by providing them as input to a partition API, for example: ::cuDevSmResourceSplitByCount.
- * - (3) Finalize the specification of resources by creating a descriptor via ::cuDevResourceGenerateDesc.
- * - (4) Provision the resources and create a green context via ::cuGreenCtxCreate.
- *
- * For \p CU_DEV_RESOURCE_TYPE_SM, the partitions created have minimum SM count requirements, often rounding up and aligning the
- * minCount provided to ::cuDevSmResourceSplitByCount. The following is a guideline for each architecture
- * and may be subject to change:
- * - On Compute Architecture 6.X: The minimum count is 1 SM.
- * - On Compute Architecture 7.X: The minimum count is 2 SMs and must be a multiple of 2.
- * - On Compute Architecture 8.X: The minimum count is 4 SMs and must be a multiple of 2.
- * - On Compute Architecture 9.0+: The minimum count is 8 SMs and must be a multiple of 8.
- *
- * In the future, flags can be provided to tradeoff functional and performance characteristics versus finer grained SM partitions.
- *
- * Even if the green contexts have disjoint SM partitions, it is not guaranteed that the kernels launched
- * in them will run concurrently or have forward progress guarantees. This is due to other resources (like HW connections,
- * see ::CUDA_DEVICE_MAX_CONNECTIONS) that could cause a dependency. Additionally, in certain scenarios,
- * it is possible for the workload to run on more SMs than was provisioned (but never less).
- * The following are two scenarios which can exhibit this behavior:
- * - On Volta+ MPS: When \p CUDA_MPS_ACTIVE_THREAD_PERCENTAGE is used,
- * the set of SMs that are used for running kernels can be scaled up to the value of SMs used for the MPS client.
- * - On Compute Architecture 9.x: When a module with dynamic parallelism (CDP) is loaded, all future
- * kernels running under green contexts may use and share an additional set of 2 SMs.
- *
- * @{
- */
- /*!
- * \typedef struct CUdevResourceDesc_st* CUdevResourceDesc;
- * An opaque descriptor handle. The descriptor encapsulates multiple created and configured resources.
- * Created via ::cuDevResourceGenerateDesc
- */
- typedef struct CUdevResourceDesc_st *CUdevResourceDesc;
- typedef enum {
- CU_GREEN_CTX_DEFAULT_STREAM = 0x1, /**< Required. Creates a default stream to use inside the green context */
- } CUgreenCtxCreate_flags;
- typedef enum {
- CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING = 0x1,
- CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE = 0x2,
- } CUdevSmResourceSplit_flags;
- #define RESOURCE_ABI_VERSION 1
- #define RESOURCE_ABI_EXTERNAL_BYTES 48
- #define _CONCAT_INNER(x, y) x ## y
- #define _CONCAT_OUTER(x, y) _CONCAT_INNER(x, y)
- /*!
- * \typedef enum CUdevResourceType
- * Type of resource
- */
- typedef enum {
- CU_DEV_RESOURCE_TYPE_INVALID = 0,
- CU_DEV_RESOURCE_TYPE_SM = 1, /**< Streaming multiprocessors related information */
- #if defined(__CUDA_API_VERSION_INTERNAL) && !defined(__CUDA_API_VERSION_INTERNAL_ODR)
- CU_DEV_RESOURCE_TYPE_MAX,
- #endif
- } CUdevResourceType;
- /*!
- * \struct CUdevSmResource
- * Data for SM-related resources
- */
- typedef struct CUdevSmResource_st {
- unsigned int smCount; /**< The amount of streaming multiprocessors available in this resource. This is an output parameter only, do not write to this field. */
- } CUdevSmResource;
- /*!
- * \struct CUdevResource
- * A tagged union describing different resources identified by the type field. This structure should not be directly modified outside of the API that created it.
- * \code
- * struct {
- * CUdevResourceType type;
- * union {
- * CUdevSmResource sm;
- * };
- * };
- * \endcode
- * - If \p type is \p CU_DEV_RESOURCE_TYPE_INVALID, this resoure is not valid and cannot be further accessed.
- * - If \p type is \p CU_DEV_RESOURCE_TYPE_SM, the ::CUdevSmResource structure \p sm is filled in. For example,
- * \p sm.smCount will reflect the amount of streaming multiprocessors available in this resource.
- */
- typedef struct CUdevResource_st {
- CUdevResourceType type; /**< Type of resource, dictates which union field was last set */
- unsigned char _internal_padding[92];
- union {
- CUdevSmResource sm; /**< Resource corresponding to CU_DEV_RESOURCE_TYPE_SM \p. type. */
- unsigned char _oversize[RESOURCE_ABI_EXTERNAL_BYTES];
- };
- } _CONCAT_OUTER(CUdevResource_v, RESOURCE_ABI_VERSION);
- typedef _CONCAT_OUTER(CUdevResource_v, RESOURCE_ABI_VERSION) CUdevResource;
- #undef _CONCAT_INNER
- #undef _CONCAT_OUTER
- #undef ABI_PER_RESOURCE_EXTERNAL_BYTES
- #undef ABI_RESOURCE_VERSION
- /**
- * \brief Creates a green context with a specified set of resources.
- *
- * This API creates a green context with the resources specified in the descriptor \p desc and
- * returns it in the handle represented by \p phCtx. This API will retain the primary context on device \p dev,
- * which will is released when the green context is destroyed. It is advised to have the primary context active
- * before calling this API to avoid the heavy cost of triggering primary context initialization and
- * deinitialization multiple times.
- *
- * The API does not set the green context current. In order to set it current, you need to explicitly set it current
- * by first converting the green context to a CUcontext using ::cuCtxFromGreenCtx and subsequently calling
- * ::cuCtxSetCurrent / ::cuCtxPushCurrent. It should be noted that a green context can be current to only one
- * thread at a time. There is no internal synchronization to make API calls accessing the same green context
- * from multiple threads work.
- *
- * Note: The API is not supported on 32-bit platforms.
- *
- * \param phCtx - Pointer for the output handle to the green context
- * \param desc - Descriptor generated via ::cuDevResourceGenerateDesc which contains the set of resources to be used
- * \param dev - Device on which to create the green context.
- * \param flags - One of the supported green context creation flags. \p CU_GREEN_CTX_DEFAULT_STREAM is required.
- *
- * The supported flags are:
- * - \p CU_GREEN_CTX_DEFAULT_STREAM : Creates a default stream to use inside the green context. Required.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_NOT_SUPPORTED,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- *
- * \sa
- * ::cuGreenCtxDestroy,
- * ::cuCtxFromGreenCtx,
- * ::cuCtxSetCurrent,
- * ::cuCtxPushCurrent,
- * ::cuDevResourceGenerateDesc,
- * ::cuDevicePrimaryCtxRetain,
- * ::cuCtxCreate,
- * ::cuCtxCreate_v3
- */
- CUresult CUDAAPI cuGreenCtxCreate(CUgreenCtx* phCtx, CUdevResourceDesc desc, CUdevice dev, unsigned int flags);
- /**
- * \brief Destroys a green context
- *
- * Destroys the green context, releasing the primary context of the device that this green context was created for.
- * Any resources provisioned for this green context (that were initially available via the resource descriptor)
- * are released as well.
- * \param hCtx - Green context to be destroyed
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_CONTEXT_IS_DESTROYED
- *
- * \sa
- * ::cuGreenCtxCreate,
- * ::cuCtxDestroy
- */
- CUresult CUDAAPI cuGreenCtxDestroy(CUgreenCtx hCtx);
- /**
- * \brief Converts a green context into the primary context
- *
- * The API converts a green context into the primary context returned in \p pContext. It is important
- * to note that the converted context \p pContext is a normal primary context but with
- * the resources of the specified green context \p hCtx. Once converted, it can then
- * be used to set the context current with ::cuCtxSetCurrent or with any of the CUDA APIs
- * that accept a CUcontext parameter.
- *
- * Users are expected to call this API before calling any CUDA APIs that accept a
- * CUcontext. Failing to do so will result in the APIs returning ::CUDA_ERROR_INVALID_CONTEXT.
- *
- * \param pContext Returned primary context with green context resources
- * \param hCtx Green context to convert
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuGreenCtxCreate
- */
- CUresult CUDAAPI cuCtxFromGreenCtx(CUcontext *pContext, CUgreenCtx hCtx);
- /**
- * \brief Get device resources
- *
- * Get the \p type resources available to the \p device.
- * This may often be the starting point for further partitioning or configuring of resources.
- *
- * Note: The API is not supported on 32-bit platforms.
- *
- * \param device - Device to get resource for
- * \param resource - Output pointer to a CUdevResource structure
- * \param type - Type of resource to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_RESOURCE_TYPE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_DEVICE
- *
- * \sa
- * ::cuDevResourceGenerateDesc
- */
- CUresult CUDAAPI cuDeviceGetDevResource(CUdevice device, CUdevResource* resource, CUdevResourceType type);
- /**
- * \brief Get context resources
- *
- * Get the \p type resources available to the context represented by \p hCtx
- * \param hCtx - Context to get resource for
- *
- * Note: The API is not supported on 32-bit platforms.
- *
- * \param resource - Output pointer to a CUdevResource structure
- * \param type - Type of resource to retrieve
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_RESOURCE_TYPE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_CONTEXT
- *
- * \sa
- * ::cuDevResourceGenerateDesc
- */
- CUresult CUDAAPI cuCtxGetDevResource(CUcontext hCtx, CUdevResource* resource, CUdevResourceType type);
- /**
- * \brief Get green context resources
- *
- * Get the \p type resources available to the green context represented by \p hCtx
- * \param hCtx - Green context to get resource for
- * \param resource - Output pointer to a CUdevResource structure
- * \param type - Type of resource to retrieve
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_RESOURCE_TYPE,
- * ::CUDA_ERROR_INVALID_VALUE
- *
- * \sa
- * ::cuDevResourceGenerateDesc
- */
- CUresult CUDAAPI cuGreenCtxGetDevResource(CUgreenCtx hCtx, CUdevResource* resource, CUdevResourceType type);
- /**
- * \brief Splits \p CU_DEV_RESOURCE_TYPE_SM resources.
- *
- * Splits \p CU_DEV_RESOURCE_TYPE_SM resources into \p nbGroups, adhering to the minimum SM count specified in \p minCount
- * and the usage flags in \p useFlags. If \p result is NULL, the API simulates a split and provides the amount of groups that
- * would be created in \p nbGroups. Otherwise, \p nbGroups must point to the amount of elements in \p result and on return,
- * the API will overwrite \p nbGroups with the amount actually created. The groups are written to the array in \p result.
- * \p nbGroups can be less than the total amount if a smaller number of groups is needed.
- *
- * This API is used to spatially partition the input resource. The input resource needs to come from one of
- * ::cuDeviceGetDevResource, ::cuCtxGetDevResource, or ::cuGreenCtxGetDevResource.
- * A limitation of the API is that the output results cannot be split again without
- * first creating a descriptor and a green context with that descriptor.
- *
- * When creating the groups, the API will take into account the performance and functional characteristics of the
- * input resource, and guarantee a split that will create a disjoint set of symmetrical partitions. This may lead to fewer groups created
- * than purely dividing the total SM count by the \p minCount due to cluster requirements or
- * alignment and granularity requirements for the minCount.
- *
- * The \p remainder set does not have the same functional or performance guarantees as the groups in \p result.
- * Its use should be carefully planned and future partitions of the \p remainder set are discouraged.
- *
- * The following flags are supported:
- * - \p CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING : Lower the minimum SM count and alignment, and treat each SM independent of its hierarchy.
- * This allows more fine grained partitions but at the cost of advanced features (such as large clusters on compute capability 9.0+).
- * - \p CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE : Compute Capability 9.0+ only. Attempt to create groups that may allow
- * for maximally sized thread clusters. This can be queried post green context creation using ::cuOccupancyMaxPotentialClusterSize.
- *
- * A successful API call must either have:
- * - A valid array of \p result pointers of size passed in \p nbGroups, with \p input of type \p CU_DEV_RESOURCE_TYPE_SM.
- * Value of \p minCount must be between 0 and the SM count specified in \p input. \p remaining may be NULL.
- * - NULL passed in for \p result, with a valid integer pointer in \p nbGroups and \p input of type \p CU_DEV_RESOURCE_TYPE_SM.
- * Value of \p minCount must be between 0 and the SM count specified in \p input. \p remaining may be NULL.
- * This queries the number of groups that would be created by the API.
- *
- * Note: The API is not supported on 32-bit platforms.
- *
- * \param result - Output array of \p CUdevResource resources. Can be NULL to query the number of groups.
- * \param nbGroups - This is a pointer, specifying the number of groups that would be or should be created as described below.
- * \param input - Input SM resource to be split. Must be a valid \p CU_DEV_RESOURCE_TYPE_SM resource.
- * \param remaining - If the input resource cannot be cleanly split among \p nbGroups, the remaining is placed in here.
- * Can be ommitted (NULL) if the user does not need the remaining set.
- * \param useFlags - Flags specifying how these partitions are used or which constraints to abide by when splitting the input. Zero is valid for default behavior.
- * \param minCount - Minimum number of SMs required
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_DEVICE,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_RESOURCE_TYPE,
- * ::CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION
- *
- * \sa
- * ::cuGreenCtxGetDevResource,
- * ::cuCtxGetDevResource,
- * ::cuDeviceGetDevResource
- */
- CUresult CUDAAPI cuDevSmResourceSplitByCount(
- CUdevResource* result, unsigned int* nbGroups, const CUdevResource* input, CUdevResource* remaining, unsigned int useFlags, unsigned int minCount);
- /**
- * \brief Generate a resource descriptor
- *
- * Generates a single resource descriptor with the set of resources specified in \p resources.
- * The generated resource descriptor is necessary for the creation of green contexts via the ::cuGreenCtxCreate API.
- * Resources of the same type can be passed in, provided they meet the requirements as noted below.
- *
- * A successful API call must have:
- * - A valid output pointer for the \p phDesc descriptor as well as a valid array of \p resources pointers,
- * with the array size passed in \p nbResources.
- * If multiple resources are provided in \p resources, the device they came from must be the same,
- * otherwise CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION is returned.
- * If multiple resources are provided in \p resources and they are of type ::CU_DEV_RESOURCE_TYPE_SM,
- * they must be outputs (whether \p result or \p remaining) from the same split API instance,
- * otherwise CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION is returned.
- *
- * Note: The API is not supported on 32-bit platforms.
- *
- * \param phDesc - Output descriptor
- * \param resources - Array of resources to be included in the descriptor
- * \param nbResources - Number of resources passed in \p resources
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_INVALID_RESOURCE_TYPE,
- * ::CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION
- *
- * \sa
- * ::cuDevSmResourceSplitByCount
- */
- CUresult CUDAAPI cuDevResourceGenerateDesc(CUdevResourceDesc *phDesc, CUdevResource *resources, unsigned int nbResources);
- /**
- * \brief Records an event.
- *
- * Captures in \p hEvent all the activities of the green context of \p hCtx
- * at the time of this call. \p hEvent and \p hCtx must be from the same
- * primary context otherwise ::CUDA_ERROR_INVALID_HANDLE is returned.
- * Calls such as ::cuEventQuery() or ::cuGreenCtxWaitEvent() will
- * then examine or wait for completion of the work that was captured. Uses of
- * \p hCtx after this call do not modify \p hEvent.
- *
- * \note The API will return ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED if the
- * specified green context \p hCtx has a stream in the capture mode. In such
- * a case, the call will invalidate all the conflicting captures.
- *
- * \param hCtx - Green context to record event for
- * \param hEvent - Event to record
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED
- *
- * \sa
- * ::cuGreenCtxWaitEvent,
- * ::cuEventRecord,
- * ::cuCtxRecordEvent,
- * ::cuCtxWaitEvent
- */
- CUresult CUDAAPI cuGreenCtxRecordEvent(CUgreenCtx hCtx, CUevent hEvent);
- /**
- * \brief Make a green context wait on an event
- *
- * Makes all future work submitted to green context \p hCtx wait for all work
- * captured in \p hEvent. The synchronization will be performed on the device
- * and will not block the calling CPU thread. See ::cuGreenCtxRecordEvent()
- * or ::cuEventRecord(), for details on what is captured by an event.
- *
- * \note \p hEvent may be from a different context or device than \p hCtx.
- *
- * \note The API will return ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED and
- * invalidate the capture if the specified event \p hEvent is part of an
- * ongoing capture sequence or if the specified green context \p hCtx has
- * a stream in the capture mode.
- *
- * \param hCtx - Green context to wait
- * \param hEvent - Event to wait on
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED
- *
- * \sa
- * ::cuGreenCtxRecordEvent,
- * ::cuStreamWaitEvent
- * ::cuCtxRecordEvent,
- * ::cuCtxWaitEvent
- */
- CUresult CUDAAPI cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent);
- /**
- * \brief Query the green context associated with a stream
- *
- * Returns the CUDA green context that the stream is associated with, or NULL if the stream
- * is not associated with any green context.
- *
- * The stream handle \p hStream can refer to any of the following:
- * <ul>
- * <li>
- * a stream created via any of the CUDA driver APIs such as ::cuStreamCreate, ::cuStreamCreateWithPriority
- * and ::cuGreenCtxStreamCreate, or their runtime API equivalents such as
- * ::cudaStreamCreate, ::cudaStreamCreateWithFlags and ::cudaStreamCreateWithPriority.
- * If during stream creation the context that was active in the calling thread was obtained
- * with cuCtxFromGreenCtx, that green context is returned in \p phCtx.
- * Otherwise, \p *phCtx is set to NULL instead.
- * </li>
- * <li>
- * special stream such as the NULL stream or ::CU_STREAM_LEGACY.
- * In that case if context that is active in the calling thread was obtained
- * with cuCtxFromGreenCtx, that green context is returned.
- * Otherwise, \p *phCtx is set to NULL instead.
- * </li>
- * </ul>
- * Passing an invalid handle will result in undefined behavior.
- *
- * \param hStream - Handle to the stream to be queried
- * \param phCtx - Returned green context associated with the stream
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_HANDLE,
- * \notefnerr
- *
- * \sa ::cuStreamDestroy,
- * ::cuStreamCreate,
- * ::cuStreamCreateWithPriority,
- * ::cuStreamGetCtx_v2,
- * ::cuGreenCtxStreamCreate,
- * ::cuStreamGetPriority,
- * ::cuStreamGetFlags,
- * ::cuStreamGetDevice
- * ::cuStreamWaitEvent,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamAddCallback,
- * ::cudaStreamCreate,
- * ::cudaStreamCreateWithFlags
- */
- CUresult CUDAAPI cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx *phCtx);
- /**
- * \brief Create a stream for use in the green context
- *
- * Creates a stream for use in the specified green context \p greenCtx and returns a handle in \p phStream.
- * The stream can be destroyed by calling ::cuStreamDestroy(). Note that the API ignores the context that
- * is current to the calling thread and creates a stream in the specified green context \p greenCtx.
- *
- * The supported values for \p flags are:
- * - ::CU_STREAM_NON_BLOCKING: This must be specified. It indicates that work running in the created
- * stream may run concurrently with work in the default stream, and that
- * the created stream should perform no implicit synchronization with the default stream.
- *
- * Specifying \p priority affects the scheduling priority of work in the stream. Priorities provide a
- * hint to preferentially run work with higher priority when possible, but do not preempt
- * already-running work or provide any other functional guarantee on execution order.
- * \p priority follows a convention where lower numbers represent higher priorities.
- * '0' represents default priority. The range of meaningful numerical priorities can
- * be queried using ::cuCtxGetStreamPriorityRange. If the specified priority is
- * outside the numerical range returned by ::cuCtxGetStreamPriorityRange,
- * it will automatically be clamped to the lowest or the highest number in the range.
- *
- * \param phStream - Returned newly created stream
- * \param greenCtx - Green context for which to create the stream for
- * \param flags - Flags for stream creation. \p CU_STREAM_NON_BLOCKING must be specified.
- * \param priority - Stream priority. Lower numbers represent higher priorities.
- * See ::cuCtxGetStreamPriorityRange for more information about
- * meaningful stream priorities that can be passed.
- *
- * \return
- * ::CUDA_SUCCESS,
- * ::CUDA_ERROR_DEINITIALIZED,
- * ::CUDA_ERROR_NOT_INITIALIZED,
- * ::CUDA_ERROR_INVALID_CONTEXT,
- * ::CUDA_ERROR_INVALID_VALUE,
- * ::CUDA_ERROR_OUT_OF_MEMORY
- * \notefnerr
- *
- * \note In the current implementation, only compute kernels launched in
- * priority streams are affected by the stream's priority. Stream priorities have
- * no effect on host-to-device and device-to-host memory operations.
- *
- * \sa ::cuStreamDestroy,
- * ::cuGreenCtxCreate
- * ::cuStreamCreate,
- * ::cuStreamGetPriority,
- * ::cuCtxGetStreamPriorityRange,
- * ::cuStreamGetFlags,
- * ::cuStreamGetDevice
- * ::cuStreamWaitEvent,
- * ::cuStreamQuery,
- * ::cuStreamSynchronize,
- * ::cuStreamAddCallback,
- * ::cudaStreamCreateWithPriority
- */
- CUresult CUDAAPI cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority);
- /** @} */
- /*
- ** *************** END CUDA_GREEN_CONTEXTS *****************
- */
- /**
- * CUDA API versioning support
- */
- #if defined(__CUDA_API_VERSION_INTERNAL)
- #undef cuMemHostRegister
- #undef cuGraphicsResourceSetMapFlags
- #undef cuLinkCreate
- #undef cuLinkAddData
- #undef cuLinkAddFile
- #undef cuDeviceTotalMem
- #undef cuCtxCreate
- #undef cuModuleGetGlobal
- #undef cuMemGetInfo
- #undef cuMemAlloc
- #undef cuMemAllocPitch
- #undef cuMemFree
- #undef cuMemGetAddressRange
- #undef cuMemAllocHost
- #undef cuMemHostGetDevicePointer
- #undef cuMemcpyHtoD
- #undef cuMemcpyDtoH
- #undef cuMemcpyDtoD
- #undef cuMemcpyDtoA
- #undef cuMemcpyAtoD
- #undef cuMemcpyHtoA
- #undef cuMemcpyAtoH
- #undef cuMemcpyAtoA
- #undef cuMemcpyHtoAAsync
- #undef cuMemcpyAtoHAsync
- #undef cuMemcpy2D
- #undef cuMemcpy2DUnaligned
- #undef cuMemcpy3D
- #undef cuMemcpyHtoDAsync
- #undef cuMemcpyDtoHAsync
- #undef cuMemcpyDtoDAsync
- #undef cuMemcpy2DAsync
- #undef cuMemcpy3DAsync
- #undef cuMemcpyBatchAsync
- #undef cuMemcpy3DBatchAsync
- #undef cuMemsetD8
- #undef cuMemsetD16
- #undef cuMemsetD32
- #undef cuMemsetD2D8
- #undef cuMemsetD2D16
- #undef cuMemsetD2D32
- #undef cuArrayCreate
- #undef cuArrayGetDescriptor
- #undef cuArray3DCreate
- #undef cuArray3DGetDescriptor
- #undef cuTexRefSetAddress
- #undef cuTexRefSetAddress2D
- #undef cuTexRefGetAddress
- #undef cuGraphicsResourceGetMappedPointer
- #undef cuCtxDestroy
- #undef cuCtxPopCurrent
- #undef cuCtxPushCurrent
- #undef cuStreamDestroy
- #undef cuEventDestroy
- #undef cuMemcpy
- #undef cuMemcpyAsync
- #undef cuMemcpyPeer
- #undef cuMemcpyPeerAsync
- #undef cuMemcpy3DPeer
- #undef cuMemcpy3DPeerAsync
- #undef cuMemsetD8Async
- #undef cuMemsetD16Async
- #undef cuMemsetD32Async
- #undef cuMemsetD2D8Async
- #undef cuMemsetD2D16Async
- #undef cuMemsetD2D32Async
- #undef cuStreamGetPriority
- #undef cuStreamGetId
- #undef cuStreamGetFlags
- #undef cuStreamGetDevice
- #undef cuStreamGetCtx
- #undef cuStreamWaitEvent
- #undef cuStreamAddCallback
- #undef cuStreamAttachMemAsync
- #undef cuStreamQuery
- #undef cuStreamSynchronize
- #undef cuEventRecord
- #undef cuEventRecordWithFlags
- #undef cuLaunchKernel
- #undef cuLaunchKernelEx
- #undef cuLaunchHostFunc
- #undef cuGraphicsMapResources
- #undef cuGraphicsUnmapResources
- #undef cuStreamWriteValue32
- #undef cuStreamWaitValue32
- #undef cuStreamWriteValue64
- #undef cuStreamWaitValue64
- #undef cuStreamBatchMemOp
- #undef cuStreamWriteValue32_v2
- #undef cuStreamWaitValue32_v2
- #undef cuStreamWriteValue64_v2
- #undef cuStreamWaitValue64_v2
- #undef cuStreamBatchMemOp_v2
- #undef cuMemPrefetchAsync
- #undef cuMemPrefetchAsync_v2
- #undef cuLaunchCooperativeKernel
- #undef cuSignalExternalSemaphoresAsync
- #undef cuWaitExternalSemaphoresAsync
- #undef cuStreamBeginCapture
- #undef cuStreamBeginCaptureToGraph
- #undef cuStreamEndCapture
- #undef cuStreamIsCapturing
- #undef cuStreamGetCaptureInfo
- #undef cuStreamGetCaptureInfo_v2
- #undef cuStreamGetCaptureInfo_v3
- #undef cuGraphInstantiateWithParams
- #undef cuGraphExecUpdate
- #undef cuGraphUpload
- #undef cuGraphLaunch
- #undef cuDevicePrimaryCtxRelease
- #undef cuDevicePrimaryCtxReset
- #undef cuDevicePrimaryCtxSetFlags
- #undef cuIpcOpenMemHandle
- #undef cuStreamCopyAttributes
- #undef cuStreamSetAttribute
- #undef cuStreamGetAttribute
- #undef cuGraphInstantiate
- #undef cuGraphAddKernelNode
- #undef cuGraphKernelNodeGetParams
- #undef cuGraphKernelNodeSetParams
- #undef cuGraphExecKernelNodeSetParams
- #undef cuMemMapArrayAsync
- #undef cuMemFreeAsync
- #undef cuMemAllocAsync
- #undef cuMemAllocFromPoolAsync
- #undef cuStreamUpdateCaptureDependencies
- #undef cuStreamUpdateCaptureDependencies_v2
- #undef cuGetProcAddress
- #undef cuStreamGetCtx_v2
- #undef cuMemBatchDecompressAsync
- CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, unsigned int Flags);
- CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags);
- CUresult CUDAAPI cuLinkCreate(unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut);
- CUresult CUDAAPI cuLinkAddData(CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name,
- unsigned int numOptions, CUjit_option *options, void **optionValues);
- CUresult CUDAAPI cuLinkAddFile(CUlinkState state, CUjitInputType type, const char *path,
- unsigned int numOptions, CUjit_option *options, void **optionValues);
- CUresult CUDAAPI cuTexRefSetAddress2D_v2(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
- typedef unsigned int CUdeviceptr_v1;
- typedef struct CUDA_MEMCPY2D_v1_st
- {
- unsigned int srcXInBytes; /**< Source X in bytes */
- unsigned int srcY; /**< Source Y */
- CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
- const void *srcHost; /**< Source host pointer */
- CUdeviceptr_v1 srcDevice; /**< Source device pointer */
- CUarray srcArray; /**< Source array reference */
- unsigned int srcPitch; /**< Source pitch (ignored when src is array) */
- unsigned int dstXInBytes; /**< Destination X in bytes */
- unsigned int dstY; /**< Destination Y */
- CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
- void *dstHost; /**< Destination host pointer */
- CUdeviceptr_v1 dstDevice; /**< Destination device pointer */
- CUarray dstArray; /**< Destination array reference */
- unsigned int dstPitch; /**< Destination pitch (ignored when dst is array) */
- unsigned int WidthInBytes; /**< Width of 2D memory copy in bytes */
- unsigned int Height; /**< Height of 2D memory copy */
- } CUDA_MEMCPY2D_v1;
- typedef struct CUDA_MEMCPY3D_v1_st
- {
- unsigned int srcXInBytes; /**< Source X in bytes */
- unsigned int srcY; /**< Source Y */
- unsigned int srcZ; /**< Source Z */
- unsigned int srcLOD; /**< Source LOD */
- CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
- const void *srcHost; /**< Source host pointer */
- CUdeviceptr_v1 srcDevice; /**< Source device pointer */
- CUarray srcArray; /**< Source array reference */
- void *reserved0; /**< Must be NULL */
- unsigned int srcPitch; /**< Source pitch (ignored when src is array) */
- unsigned int srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */
- unsigned int dstXInBytes; /**< Destination X in bytes */
- unsigned int dstY; /**< Destination Y */
- unsigned int dstZ; /**< Destination Z */
- unsigned int dstLOD; /**< Destination LOD */
- CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
- void *dstHost; /**< Destination host pointer */
- CUdeviceptr_v1 dstDevice; /**< Destination device pointer */
- CUarray dstArray; /**< Destination array reference */
- void *reserved1; /**< Must be NULL */
- unsigned int dstPitch; /**< Destination pitch (ignored when dst is array) */
- unsigned int dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */
- unsigned int WidthInBytes; /**< Width of 3D memory copy in bytes */
- unsigned int Height; /**< Height of 3D memory copy */
- unsigned int Depth; /**< Depth of 3D memory copy */
- } CUDA_MEMCPY3D_v1;
- typedef struct CUDA_ARRAY_DESCRIPTOR_v1_st
- {
- unsigned int Width; /**< Width of array */
- unsigned int Height; /**< Height of array */
- CUarray_format Format; /**< Array format */
- unsigned int NumChannels; /**< Channels per array element */
- } CUDA_ARRAY_DESCRIPTOR_v1;
- typedef struct CUDA_ARRAY3D_DESCRIPTOR_v1_st
- {
- unsigned int Width; /**< Width of 3D array */
- unsigned int Height; /**< Height of 3D array */
- unsigned int Depth; /**< Depth of 3D array */
- CUarray_format Format; /**< Array format */
- unsigned int NumChannels; /**< Channels per array element */
- unsigned int Flags; /**< Flags */
- } CUDA_ARRAY3D_DESCRIPTOR_v1;
- CUresult CUDAAPI cuDeviceTotalMem(unsigned int *bytes, CUdevice dev);
- CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev);
- CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr_v1 *dptr, unsigned int *bytes, CUmodule hmod, const char *name);
- CUresult CUDAAPI cuMemGetInfo(unsigned int *free, unsigned int *total);
- CUresult CUDAAPI cuMemAlloc(CUdeviceptr_v1 *dptr, unsigned int bytesize);
- CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr_v1 *dptr, unsigned int *pPitch, unsigned int WidthInBytes, unsigned int Height, unsigned int ElementSizeBytes);
- CUresult CUDAAPI cuMemFree(CUdeviceptr_v1 dptr);
- CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr_v1 *pbase, unsigned int *psize, CUdeviceptr_v1 dptr);
- CUresult CUDAAPI cuMemAllocHost(void **pp, unsigned int bytesize);
- CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr_v1 *pdptr, void *p, unsigned int Flags);
- CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr_v1 dstDevice, const void *srcHost, unsigned int ByteCount);
- CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr_v1 srcDevice, unsigned int ByteCount);
- CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr_v1 dstDevice, CUdeviceptr_v1 srcDevice, unsigned int ByteCount);
- CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, unsigned int dstOffset, CUdeviceptr_v1 srcDevice, unsigned int ByteCount);
- CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr_v1 dstDevice, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount);
- CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, unsigned int dstOffset, const void *srcHost, unsigned int ByteCount);
- CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount);
- CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, unsigned int dstOffset, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount);
- CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, unsigned int dstOffset, const void *srcHost, unsigned int ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D_v1 *pCopy);
- CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D_v1 *pCopy);
- CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D_v1 *pCopy);
- CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr_v1 dstDevice, const void *srcHost, unsigned int ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr_v1 srcDevice, unsigned int ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr_v1 dstDevice, CUdeviceptr_v1 srcDevice, unsigned int ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D_v1 *pCopy, CUstream hStream);
- CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D_v1 *pCopy, CUstream hStream);
- CUresult CUDAAPI cuMemsetD8(CUdeviceptr_v1 dstDevice, unsigned char uc, unsigned int N);
- CUresult CUDAAPI cuMemsetD16(CUdeviceptr_v1 dstDevice, unsigned short us, unsigned int N);
- CUresult CUDAAPI cuMemsetD32(CUdeviceptr_v1 dstDevice, unsigned int ui, unsigned int N);
- CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr_v1 dstDevice, unsigned int dstPitch, unsigned char uc, unsigned int Width, unsigned int Height);
- CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr_v1 dstDevice, unsigned int dstPitch, unsigned short us, unsigned int Width, unsigned int Height);
- CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr_v1 dstDevice, unsigned int dstPitch, unsigned int ui, unsigned int Width, unsigned int Height);
- CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR_v1 *pAllocateArray);
- CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR_v1 *pArrayDescriptor, CUarray hArray);
- CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR_v1 *pAllocateArray);
- CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR_v1 *pArrayDescriptor, CUarray hArray);
- CUresult CUDAAPI cuTexRefSetAddress(unsigned int *ByteOffset, CUtexref hTexRef, CUdeviceptr_v1 dptr, unsigned int bytes);
- CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR_v1 *desc, CUdeviceptr_v1 dptr, unsigned int Pitch);
- CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr_v1 *pdptr, CUtexref hTexRef);
- CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr_v1 *pDevPtr, unsigned int *pSize, CUgraphicsResource resource);
- CUresult CUDAAPI cuCtxDestroy(CUcontext ctx);
- CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx);
- CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx);
- CUresult CUDAAPI cuStreamDestroy(CUstream hStream);
- CUresult CUDAAPI cuEventDestroy(CUevent hEvent);
- CUresult CUDAAPI cuDevicePrimaryCtxRelease(CUdevice dev);
- CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev);
- CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags);
- CUresult CUDAAPI cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyAtoH_v2(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpyAtoHAsync_v2(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpy2D_v2(const CUDA_MEMCPY2D *pCopy);
- CUresult CUDAAPI cuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D *pCopy);
- CUresult CUDAAPI cuMemcpy3D_v2(const CUDA_MEMCPY3D *pCopy);
- CUresult CUDAAPI cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpy2DAsync_v2(const CUDA_MEMCPY2D *pCopy, CUstream hStream);
- CUresult CUDAAPI cuMemcpy3DAsync_v2(const CUDA_MEMCPY3D *pCopy, CUstream hStream);
- CUresult CUDAAPI cuMemsetD8_v2(CUdeviceptr dstDevice, unsigned char uc, size_t N);
- CUresult CUDAAPI cuMemsetD16_v2(CUdeviceptr dstDevice, unsigned short us, size_t N);
- CUresult CUDAAPI cuMemsetD32_v2(CUdeviceptr dstDevice, unsigned int ui, size_t N);
- CUresult CUDAAPI cuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
- CUresult CUDAAPI cuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
- CUresult CUDAAPI cuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
- CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
- CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream);
- CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy);
- CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, CUstream hStream);
- CUresult CUDAAPI cuMemcpyBatchAsync(CUdeviceptr *dsts, CUdeviceptr *srcs, size_t *sizes, size_t count,
- CUmemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs,
- size_t *failIdx, CUstream hStream);
- CUresult CUDAAPI cuMemcpy3DBatchAsync(size_t numOps, CUDA_MEMCPY3D_BATCH_OP *opList,
- size_t *failIdx, unsigned long long flags, CUstream hStream);
- CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
- CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
- CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
- CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
- CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
- CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
- CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority);
- CUresult CUDAAPI cuStreamGetId(CUstream hStream, unsigned long long *streamId);
- CUresult CUDAAPI cuStreamGetFlags(CUstream hStream, unsigned int *flags);
- CUresult CUDAAPI cuStreamGetDevice(CUstream hStream, CUdevice *device);
- CUresult CUDAAPI cuStreamGetCtx(CUstream hStream, CUcontext *pctx);
- CUresult CUDAAPI cuStreamGetCtx_v2(CUstream hStream, CUcontext *pCtx, CUgreenCtx *pGreenCtx);
- CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
- CUresult CUDAAPI cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void *userData, unsigned int flags);
- CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags);
- CUresult CUDAAPI cuStreamQuery(CUstream hStream);
- CUresult CUDAAPI cuStreamSynchronize(CUstream hStream);
- CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream);
- CUresult CUDAAPI cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags);
- CUresult CUDAAPI cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
- CUresult CUDAAPI cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, void **kernelParams, void **extra);
- CUresult CUDAAPI cuLaunchHostFunc(CUstream hStream, CUhostFn fn, void *userData);
- CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
- CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
- CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams *paramArray, unsigned int flags);
- CUresult CUDAAPI cuStreamWriteValue32_ptsz(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamWaitValue32_ptsz(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamWriteValue64_ptsz(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamWaitValue64_ptsz(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamBatchMemOp_ptsz(CUstream stream, unsigned int count, CUstreamBatchMemOpParams *paramArray, unsigned int flags);
- CUresult CUDAAPI cuStreamWriteValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamWaitValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamWriteValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamWaitValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
- CUresult CUDAAPI cuStreamBatchMemOp_v2(CUstream stream, unsigned int count, CUstreamBatchMemOpParams *paramArray, unsigned int flags);
- CUresult CUDAAPI cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream);
- CUresult CUDAAPI cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, unsigned int flags, CUstream hStream);
- CUresult CUDAAPI cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams);
- CUresult CUDAAPI cuSignalExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream);
- CUresult CUDAAPI cuWaitExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream);
- CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream);
- CUresult CUDAAPI cuStreamBeginCapture_ptsz(CUstream hStream);
- CUresult CUDAAPI cuStreamBeginCapture_v2(CUstream hStream, CUstreamCaptureMode mode);
- CUresult CUDAAPI cuStreamBeginCaptureToGraph(CUstream hStream, CUgraph hGraph, const CUgraphNode *dependencies, const CUgraphEdgeData *dependencyData, size_t numDependencies, CUstreamCaptureMode mode);
- CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph);
- CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus *captureStatus);
- CUresult CUDAAPI cuStreamGetCaptureInfo(CUstream hStream, CUstreamCaptureStatus *captureStatus_out, cuuint64_t *id_out);
- CUresult CUDAAPI cuStreamGetCaptureInfo_ptsz(CUstream hStream, CUstreamCaptureStatus *captureStatus_out, cuuint64_t *id_out);
- CUresult CUDAAPI cuStreamGetCaptureInfo_v2(CUstream hStream, CUstreamCaptureStatus *captureStatus_out, cuuint64_t *id_out, CUgraph *graph_out, const CUgraphNode **dependencies_out, size_t *numDependencies_out);
- CUresult CUDAAPI cuStreamGetCaptureInfo_v3(CUstream hStream, CUstreamCaptureStatus *captureStatus_out, cuuint64_t *id_out, CUgraph *graph_out, const CUgraphNode **dependencies_out, const CUgraphEdgeData **edgeData_out, size_t *numDependencies_out);
- CUresult CUDAAPI cuGraphAddKernelNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS_v1 *nodeParams);
- CUresult CUDAAPI cuGraphKernelNodeGetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS_v1 *nodeParams);
- CUresult CUDAAPI cuGraphKernelNodeSetParams(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS_v1 *nodeParams);
- CUresult CUDAAPI cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS_v1 *nodeParams);
- CUresult CUDAAPI cuGraphInstantiateWithParams(CUgraphExec *phGraphExec, CUgraph hGraph, CUDA_GRAPH_INSTANTIATE_PARAMS *instantiateParams);
- CUresult CUDAAPI cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphNode *hErrorNode_out, CUgraphExecUpdateResult *updateResult_out);
- CUresult CUDAAPI cuGraphUpload(CUgraphExec hGraph, CUstream hStream);
- CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraph, CUstream hStream);
- CUresult CUDAAPI cuStreamCopyAttributes(CUstream dstStream, CUstream srcStream);
- CUresult CUDAAPI cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, CUstreamAttrValue *value);
- CUresult CUDAAPI cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, const CUstreamAttrValue *param);
- CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int Flags);
- CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode, char *logBuffer, size_t bufferSize);
- CUresult CUDAAPI cuGraphInstantiate_v2(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode, char *logBuffer, size_t bufferSize);
- CUresult CUDAAPI cuMemMapArrayAsync(CUarrayMapInfo *mapInfoList, unsigned int count, CUstream hStream);
- CUresult CUDAAPI cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream);
- CUresult CUDAAPI cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, CUstream hStream);
- CUresult CUDAAPI cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream);
- CUresult CUDAAPI cuStreamUpdateCaptureDependencies(CUstream hStream, CUgraphNode *dependencies, size_t numDependencies, unsigned int flags);
- CUresult CUDAAPI cuStreamUpdateCaptureDependencies_v2(CUstream hStream, CUgraphNode *dependencies, const CUgraphEdgeData *dependencyData, size_t numDependencies, unsigned int flags);
- CUresult CUDAAPI cuMemBatchDecompressAsync(
- CUmemDecompressParams *paramsArray,
- size_t count,
- unsigned int flags,
- size_t *errorIndex,
- CUstream stream
- );
- CUresult CUDAAPI cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags);
- #elif defined(__CUDA_API_PER_THREAD_DEFAULT_STREAM)
- static inline CUresult cuGetProcAddress_v2_ptsz(const char *symbol, void **funcPtr, int driverVersion, cuuint64_t flags, CUdriverProcAddressQueryResult *symbolStatus) {
- const int procAddressMask = (CU_GET_PROC_ADDRESS_LEGACY_STREAM|
- CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM);
- if ((flags & procAddressMask) == 0) {
- flags |= CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM;
- }
- return cuGetProcAddress_v2(symbol, funcPtr, driverVersion, flags, symbolStatus);
- }
- #define cuGetProcAddress_v2 cuGetProcAddress_v2_ptsz
- #endif
- /**
- * \defgroup CUDA_CHECKPOINT CUDA Checkpointing
- *
- * ___MANBRIEF___ CUDA checkpoint and restore functionality of the low-level
- * CUDA driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
- *
- * This sections describes the checkpoint and restore functions of the low-level
- * CUDA driver application programming interface.
- *
- * The CUDA checkpoint and restore API's provide a way to save and restore GPU
- * state for full process checkpoints when used with CPU side process
- * checkpointing solutions. They can also be used to pause GPU work and suspend
- * a CUDA process to allow other applications to make use of GPU resources.
- *
- * Checkpoint and restore capabilities are currently restricted to Linux.
- *
- * @{
- */
- /**
- * \brief Returns the restore thread ID for a CUDA process
- *
- * Returns in \p *tid the thread ID of the CUDA restore thread for the process
- * specified by \p pid.
- *
- * \param pid - The process ID of the CUDA process
- * \param tid - Returned restore thread ID
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- * ::CUDA_ERROR_NOT_INITIALIZED
- * ::CUDA_ERROR_NOT_SUPPORTED
- */
- CUresult CUDAAPI cuCheckpointProcessGetRestoreThreadId(int pid, int *tid);
- /**
- * \brief Returns the process state of a CUDA process
- *
- * Returns in \p *state the current state of the CUDA process specified by \p pid.
- *
- * \param pid - The process ID of the CUDA process
- * \param state - Returned CUDA process state
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- * ::CUDA_ERROR_NOT_INITIALIZED
- * ::CUDA_ERROR_NOT_SUPPORTED
- */
- CUresult CUDAAPI cuCheckpointProcessGetState(int pid, CUprocessState *state);
- /**
- * \brief Lock a running CUDA process
- *
- * Lock the CUDA process specified by \p pid which will block further CUDA API
- * calls. Process must be in the RUNNING state in order to lock.
- *
- * Upon successful return the process will be in the LOCKED state.
- *
- * If timeoutMs is specified and the timeout is reached the process will be left
- * in the RUNNING state upon return.
- *
- * \param pid - The process ID of the CUDA process
- * \param args - Optional lock operation arguments
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- * ::CUDA_ERROR_NOT_INITIALIZED
- * ::CUDA_ERROR_ILLEGAL_STATE
- * ::CUDA_ERROR_NOT_SUPPORTED
- * ::CUDA_ERROR_NOT_READY
- */
- CUresult CUDAAPI cuCheckpointProcessLock(int pid, CUcheckpointLockArgs *args);
- /**
- * \brief Checkpoint a CUDA process's GPU memory contents
- *
- * Checkpoints a CUDA process specified by \p pid that is in the LOCKED
- * state. The GPU memory contents will be brought into host memory and all
- * underlying references will be released. Process must be in the LOCKED state
- * to checkpoint.
- *
- * Upon successful return the process will be in the CHECKPOINTED state.
- *
- * \param pid - The process ID of the CUDA process
- * \param args - Optional checkpoint operation arguments
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- * ::CUDA_ERROR_NOT_INITIALIZED
- * ::CUDA_ERROR_ILLEGAL_STATE
- * ::CUDA_ERROR_NOT_SUPPORTED
- */
- CUresult CUDAAPI cuCheckpointProcessCheckpoint(int pid, CUcheckpointCheckpointArgs *args);
- /**
- * \brief Restore a CUDA process's GPU memory contents from its last checkpoint
- *
- * Restores a CUDA process specified by \p pid from its last checkpoint. Process
- * must be in the CHECKPOINTED state to restore.
- *
- * Upon successful return the process will be in the LOCKED state.
- *
- * CUDA process restore requires persistence mode to be enabled or ::cuInit to
- * have been called before execution.
- *
- * \param pid - The process ID of the CUDA process
- * \param args - Optional restore operation arguments
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- * ::CUDA_ERROR_NOT_INITIALIZED
- * ::CUDA_ERROR_ILLEGAL_STATE
- * ::CUDA_ERROR_NOT_SUPPORTED
- *
- * \sa
- * ::cuInit
- */
- CUresult CUDAAPI cuCheckpointProcessRestore(int pid, CUcheckpointRestoreArgs *args);
- /**
- * \brief Unlock a CUDA process to allow CUDA API calls
- *
- * Unlocks a process specified by \p pid allowing it to resume making CUDA API
- * calls. Process must be in the LOCKED state.
- *
- * Upon successful return the process will be in the RUNNING state.
- *
- * \param pid - The process ID of the CUDA process
- * \param args - Optional unlock operation arguments
- *
- * \return
- * ::CUDA_SUCCESS
- * ::CUDA_ERROR_INVALID_VALUE
- * ::CUDA_ERROR_NOT_INITIALIZED
- * ::CUDA_ERROR_ILLEGAL_STATE
- * ::CUDA_ERROR_NOT_SUPPORTED
- */
- CUresult CUDAAPI cuCheckpointProcessUnlock(int pid, CUcheckpointUnlockArgs *args);
- /** @} */ /* End CUDA_CHECKPOINT */
- #ifdef __cplusplus
- }
- #endif
- #if defined(__GNUC__)
- #if defined(__CUDA_API_PUSH_VISIBILITY_DEFAULT)
- #pragma GCC visibility pop
- #endif
- #endif
- #undef __CUDA_DEPRECATED
- #endif /* __cuda_cuda_h__ */
|