| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945 |
- # Copyright 2019-present, the HuggingFace Inc. team.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from __future__ import annotations
- import base64
- import inspect
- import itertools
- import json
- import re
- import struct
- import time
- import warnings
- from collections import defaultdict
- from collections.abc import Callable, Iterable, Iterator
- from concurrent.futures import Future, ThreadPoolExecutor
- from dataclasses import asdict, dataclass, field
- from datetime import datetime
- from functools import wraps
- from itertools import islice
- from pathlib import Path
- from typing import TYPE_CHECKING, Any, BinaryIO, Literal, TypeVar, overload
- from urllib.parse import quote, unquote
- import httpcore
- import httpx
- from tqdm.auto import tqdm as base_tqdm
- from tqdm.contrib.concurrent import thread_map
- from huggingface_hub.utils._xet import (
- XetTokenType,
- fetch_xet_connection_info_from_repo_info,
- reset_xet_connection_info_cache_for_repo,
- )
- from . import constants
- from ._buckets import (
- BucketFile,
- BucketFileMetadata,
- BucketFolder,
- BucketInfo,
- BucketUrl,
- SyncPlan,
- _BucketAddFile,
- _BucketCopyFile,
- _BucketDeleteFile,
- _split_bucket_id_and_prefix,
- sync_bucket_internal,
- )
- from ._commit_api import (
- CommitOperation,
- CommitOperationAdd,
- CommitOperationCopy,
- CommitOperationDelete,
- _fetch_files_to_copy,
- _fetch_upload_modes,
- _prepare_commit_payload,
- _upload_files,
- _warn_on_overwriting_operations,
- )
- from ._dataset_viewer import DatasetParquetEntry
- from ._eval_results import EvalResultEntry, parse_eval_result_entries
- from ._inference_endpoints import InferenceEndpoint, InferenceEndpointScalingMetric, InferenceEndpointType
- from ._jobs_api import JobHardware, JobInfo, JobSpec, ScheduledJobInfo, _create_job_spec
- from ._space_api import SpaceHardware, SpaceRuntime, SpaceSearchResult, SpaceStorage, SpaceVariable, Volume
- from ._upload_large_folder import upload_large_folder_internal
- from .community import (
- Discussion,
- DiscussionComment,
- DiscussionStatusChange,
- DiscussionTitleChange,
- DiscussionWithDetails,
- deserialize_event,
- )
- from .errors import (
- BadRequestError,
- EntryNotFoundError,
- GatedRepoError,
- HfHubHTTPError,
- LocalTokenNotFoundError,
- RemoteEntryNotFoundError,
- RepositoryNotFoundError,
- RevisionNotFoundError,
- XetAuthorizationError,
- XetRefreshTokenError,
- )
- from .file_download import DryRunFileInfo, HfFileMetadata, get_hf_file_metadata, hf_hub_url
- from .repocard_data import DatasetCardData, ModelCardData, SpaceCardData
- from .utils import (
- DEFAULT_IGNORE_PATTERNS,
- NotASafetensorsRepoError,
- SafetensorsFileMetadata,
- SafetensorsParsingError,
- SafetensorsRepoMetadata,
- TensorInfo,
- are_progress_bars_disabled,
- build_hf_headers,
- chunk_iterable,
- experimental,
- filter_repo_objects,
- fix_hf_endpoint_in_url,
- get_session,
- get_token,
- hf_raise_for_status,
- http_backoff,
- logging,
- paginate,
- parse_datetime,
- parse_xet_file_data_from_response,
- refresh_xet_connection_info,
- silent_tqdm,
- validate_hf_hub_args,
- )
- from .utils import tqdm as hf_tqdm
- from .utils._auth import _get_token_from_environment, _get_token_from_file, _get_token_from_google_colab
- from .utils._deprecation import _deprecate_arguments, _deprecate_method
- from .utils._http import _httpx_follow_relative_redirects_with_backoff
- from .utils._typing import CallableT
- from .utils._verification import collect_local_files, resolve_local_root, verify_maps
- from .utils.endpoint_helpers import _is_emission_within_threshold
- from .utils.tqdm import _get_progress_bar_context
- if TYPE_CHECKING:
- from .inference._providers import PROVIDER_T
- from .utils._verification import FolderVerification
- from .utils._xet_progress_reporting import XetProgressReporter
- R = TypeVar("R") # Return type
- CollectionItemType_T = Literal["model", "dataset", "space", "paper", "collection", "bucket"]
- CollectionSort_T = Literal["lastModified", "trending", "upvotes"]
- RepoVisibility_T = Literal["public", "private", "protected"]
- ExpandModelProperty_T = Literal[
- "author",
- "baseModels",
- "cardData",
- "childrenModelCount",
- "config",
- "createdAt",
- "disabled",
- "downloads",
- "downloadsAllTime",
- "evalResults",
- "gated",
- "gguf",
- "inference",
- "inferenceProviderMapping",
- "lastModified",
- "library_name",
- "likes",
- "mask_token",
- "model-index",
- "pipeline_tag",
- "private",
- "resourceGroup",
- "safetensors",
- "sha",
- "siblings",
- "spaces",
- "tags",
- "transformersInfo",
- "trendingScore",
- "usedStorage",
- "widgetData",
- ]
- ExpandDatasetProperty_T = Literal[
- "author",
- "cardData",
- "citation",
- "createdAt",
- "description",
- "disabled",
- "downloads",
- "downloadsAllTime",
- "gated",
- "lastModified",
- "likes",
- "paperswithcode_id",
- "private",
- "resourceGroup",
- "sha",
- "siblings",
- "tags",
- "trendingScore",
- "usedStorage",
- ]
- ExpandSpaceProperty_T = Literal[
- "author",
- "cardData",
- "createdAt",
- "datasets",
- "disabled",
- "lastModified",
- "likes",
- "models",
- "private",
- "resourceGroup",
- "runtime",
- "sdk",
- "sha",
- "siblings",
- "subdomain",
- "tags",
- "trendingScore",
- "usedStorage",
- ]
- ModelSort_T = Literal["created_at", "downloads", "last_modified", "likes", "trending_score"]
- DatasetSort_T = Literal["created_at", "downloads", "last_modified", "likes", "trending_score"]
- SpaceSort_T = Literal["created_at", "last_modified", "likes", "trending_score"]
- DailyPapersSort_T = Literal["publishedAt", "trending"]
- USERNAME_PLACEHOLDER = "hf_user"
- _REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$")
- _REGEX_HTTP_PROTOCOL = re.compile(r"https?://")
- _CREATE_COMMIT_NO_REPO_ERROR_MESSAGE = (
- "\nNote: Creating a commit assumes that the repo already exists on the"
- " Huggingface Hub. Please use `create_repo` if it's not the case."
- )
- _AUTH_CHECK_NO_REPO_ERROR_MESSAGE = (
- "\nNote: The repository either does not exist or you do not have access rights."
- " Please check the repository ID and your access permissions."
- " If this is a private repository, ensure that your token is correct."
- )
- _BUCKET_PATHS_INFO_BATCH_SIZE = 1000
- _BUCKET_BATCH_ADD_CHUNK_SIZE = 1000
- _BUCKET_BATCH_DELETE_CHUNK_SIZE = 1000
- # Regex used to match special revisions with "/" in them (see #1710)
- SPECIAL_REFS_REVISION_REGEX = re.compile(
- r"""
- (^refs\/convert\/\w+) # `refs/convert/parquet` revisions
- |
- (^refs\/pr\/\d+) # PR revisions
- """,
- re.VERBOSE,
- )
- logger = logging.get_logger(__name__)
- def _resolve_repo_visibility(
- *,
- private: bool | None,
- visibility: RepoVisibility_T | None,
- repo_type: str | None,
- ) -> RepoVisibility_T | None:
- if private is not None and visibility is not None:
- raise ValueError("Received both `private` and `visibility` arguments. Please provide only one of them.")
- if visibility is None:
- if private is None:
- return None
- return "private" if private else "public"
- if visibility == "protected" and repo_type != constants.REPO_TYPE_SPACE:
- raise ValueError("Only Spaces can be 'protected'. Please set visibility to 'public' or 'private'.")
- return visibility
- def repo_type_and_id_from_hf_id(hf_id: str, hub_url: str | None = None) -> tuple[str | None, str | None, str]:
- """
- Returns the repo type and ID from a huggingface.co URL linking to a
- repository
- Args:
- hf_id (`str`):
- An URL or ID of a repository on the HF hub. Accepted values are:
- - https://huggingface.co/<repo_type>/<namespace>/<repo_id>
- - https://huggingface.co/<namespace>/<repo_id>
- - hf://<repo_type>/<namespace>/<repo_id>
- - hf://<namespace>/<repo_id>
- - <repo_type>/<namespace>/<repo_id>
- - <namespace>/<repo_id>
- - <repo_id>
- hub_url (`str`, *optional*):
- The URL of the HuggingFace Hub, defaults to https://huggingface.co
- Returns:
- A tuple with three items: repo_type (`str` or `None`), namespace (`str` or
- `None`) and repo_id (`str`).
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If URL cannot be parsed.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If `repo_type` is unknown.
- """
- input_hf_id = hf_id
- # Get the hub_url (with or without protocol)
- full_hub_url = hub_url if hub_url is not None else constants.ENDPOINT
- hub_url_without_protocol = _REGEX_HTTP_PROTOCOL.sub("", full_hub_url)
- # Check if hf_id is a URL containing the hub_url (check both with and without protocol)
- hf_id_without_protocol = _REGEX_HTTP_PROTOCOL.sub("", hf_id)
- is_hf_url = hub_url_without_protocol in hf_id_without_protocol and "@" not in hf_id
- HFFS_PREFIX = "hf://"
- if hf_id.startswith(HFFS_PREFIX): # Remove "hf://" prefix if exists
- hf_id = hf_id[len(HFFS_PREFIX) :]
- # If it's a URL, strip the endpoint prefix to get the path
- if is_hf_url:
- # Remove protocol if present
- hf_id_normalized = _REGEX_HTTP_PROTOCOL.sub("", hf_id)
- # Remove the hub_url prefix to get the relative path
- if hf_id_normalized.startswith(hub_url_without_protocol):
- # Strip the hub URL and any leading slashes
- hf_id = hf_id_normalized[len(hub_url_without_protocol) :].lstrip("/")
- url_segments = hf_id.split("/")
- is_hf_id = len(url_segments) <= 3
- namespace: str | None
- if is_hf_url:
- # For URLs, we need to extract repo_type, namespace, repo_id
- # Expected format after stripping endpoint: [repo_type]/namespace/repo_id or namespace/repo_id
- if len(url_segments) >= 3:
- # Check if first segment is a repo type
- if url_segments[0] in constants.REPO_TYPES_MAPPING:
- repo_type = constants.REPO_TYPES_MAPPING[url_segments[0]]
- namespace = url_segments[1]
- repo_id = url_segments[2]
- elif url_segments[0] == "buckets":
- # Special case for buckets
- repo_type = "bucket"
- namespace = url_segments[1]
- repo_id = url_segments[2]
- else:
- # First segment is namespace
- namespace = url_segments[0]
- repo_id = url_segments[1]
- repo_type = None
- elif len(url_segments) == 2:
- namespace = url_segments[0]
- repo_id = url_segments[1]
- # Check if namespace is actually a repo type mapping
- if namespace in constants.REPO_TYPES_MAPPING:
- # Mean canonical dataset or model
- repo_type = constants.REPO_TYPES_MAPPING[namespace]
- namespace = None
- elif namespace == "buckets":
- # Special case for buckets
- repo_type = "bucket"
- namespace = None
- else:
- repo_type = None
- else:
- # Single segment
- repo_id = url_segments[0]
- namespace = None
- repo_type = None
- elif is_hf_id:
- if len(url_segments) == 3:
- # Passed <repo_type>/<user>/<model_id> or <repo_type>/<org>/<model_id>
- repo_type, namespace, repo_id = url_segments[-3:]
- elif len(url_segments) == 2:
- if url_segments[0] in constants.REPO_TYPES_MAPPING:
- # Passed '<model_id>' or 'datasets/<dataset_id>' for a canonical model or dataset
- repo_type = constants.REPO_TYPES_MAPPING[url_segments[0]]
- namespace = None
- repo_id = hf_id.split("/")[-1]
- elif url_segments[0] == "buckets":
- # Special case for buckets
- repo_type = "bucket"
- namespace = None
- repo_id = hf_id.split("/")[-1]
- else:
- # Passed <user>/<model_id> or <org>/<model_id>
- namespace, repo_id = hf_id.split("/")[-2:]
- repo_type = None
- else:
- # Passed <model_id>
- repo_id = url_segments[0]
- namespace, repo_type = None, None
- else:
- raise ValueError(f"Unable to retrieve user and repo ID from the passed HF ID: {hf_id}")
- # Check if repo type is known (mapping "spaces" => "space" + empty value => `None`)
- if repo_type in constants.REPO_TYPES_MAPPING:
- repo_type = constants.REPO_TYPES_MAPPING[repo_type]
- if repo_type == "":
- repo_type = None
- if repo_type not in constants.REPO_TYPES_WITH_KERNEL and repo_type != "bucket":
- raise ValueError(f"Unknown `repo_type`: '{repo_type}' ('{input_hf_id}')")
- return repo_type, namespace, repo_id
- def _parse_hf_copy_handle(hf_handle: str) -> _BucketCopyHandle | _RepoCopyHandle:
- # TODO: Harmonize hf:// parsing. See https://github.com/huggingface/huggingface_hub/issues/3971
- if not hf_handle.startswith("hf://"):
- raise ValueError(f"Invalid HF handle: '{hf_handle}'. Expected a path starting with 'hf://'.")
- path = hf_handle.removeprefix("hf://")
- if path.startswith("buckets/"):
- bucket_id, bucket_path = _split_bucket_id_and_prefix(path.removeprefix("buckets/"))
- return _BucketCopyHandle(
- bucket_id=bucket_id,
- path=bucket_path.strip("/"),
- )
- path = path.strip("/")
- if path == "":
- raise ValueError(f"Invalid HF handle: '{hf_handle}'.")
- parts = path.split("/")
- repo_type: str = constants.REPO_TYPE_MODEL
- if parts[0] in constants.REPO_TYPES_MAPPING:
- repo_type = constants.REPO_TYPES_MAPPING[parts[0]]
- parts = parts[1:]
- if len(parts) < 2:
- raise ValueError(
- f"Invalid repo HF handle: '{hf_handle}'. Expected format 'hf://<namespace>/<repo_id>/path' or with explicit repo type prefix."
- )
- namespace, repo_name_with_revision = parts[0], parts[1]
- remaining_parts = parts[2:]
- revision: str | None = None
- if "@" in repo_name_with_revision:
- repo_name, revision = repo_name_with_revision.split("@", 1)
- else:
- repo_name = repo_name_with_revision
- if revision is None:
- revision = constants.DEFAULT_REVISION
- else:
- revision = unquote(revision)
- if remaining_parts:
- maybe_special_ref = f"{revision}/{remaining_parts[0]}"
- match = SPECIAL_REFS_REVISION_REGEX.match(maybe_special_ref)
- if match is not None:
- revision = match.group()
- suffix = maybe_special_ref.removeprefix(revision).lstrip("/")
- remaining_parts = ([suffix] if suffix else []) + remaining_parts[1:]
- repo_path = "/".join(remaining_parts).strip("/")
- return _RepoCopyHandle(
- repo_type=repo_type, # type: ignore
- repo_id=f"{namespace}/{repo_name}",
- revision=revision,
- path=repo_path,
- )
- @dataclass
- class LastCommitInfo(dict):
- oid: str
- title: str
- date: datetime
- def __post_init__(self): # hack to make LastCommitInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class BlobLfsInfo(dict):
- size: int
- sha256: str
- pointer_size: int
- def __post_init__(self): # hack to make BlobLfsInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class BlobSecurityInfo(dict):
- safe: bool # duplicate information with "status" field, keeping it for backward compatibility
- status: str
- av_scan: dict | None
- pickle_import_scan: dict | None
- def __post_init__(self): # hack to make BlogSecurityInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class TransformersInfo(dict):
- auto_model: str
- custom_class: str | None = None
- # possible `pipeline_tag` values: https://github.com/huggingface/huggingface.js/blob/3ee32554b8620644a6287e786b2a83bf5caf559c/packages/tasks/src/pipelines.ts#L72
- pipeline_tag: str | None = None
- processor: str | None = None
- def __post_init__(self): # hack to make TransformersInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class SafeTensorsInfo(dict):
- parameters: dict[str, int]
- total: int
- def __post_init__(self): # hack to make SafeTensorsInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class CommitInfo(str):
- """Data structure containing information about a newly created commit.
- Returned by any method that creates a commit on the Hub: [`create_commit`], [`upload_file`], [`upload_folder`],
- [`delete_file`], [`delete_folder`]. It inherits from `str` for backward compatibility but using methods specific
- to `str` is deprecated.
- Attributes:
- commit_url (`str`):
- Url where to find the commit.
- commit_message (`str`):
- The summary (first line) of the commit that has been created.
- commit_description (`str`):
- Description of the commit that has been created. Can be empty.
- oid (`str`):
- Commit hash id. Example: `"91c54ad1727ee830252e457677f467be0bfd8a57"`.
- pr_url (`str`, *optional*):
- Url to the PR that has been created, if any. Populated when `create_pr=True`
- is passed.
- pr_revision (`str`, *optional*):
- Revision of the PR that has been created, if any. Populated when
- `create_pr=True` is passed. Example: `"refs/pr/1"`.
- pr_num (`int`, *optional*):
- Number of the PR discussion that has been created, if any. Populated when
- `create_pr=True` is passed. Can be passed as `discussion_num` in
- [`get_discussion_details`]. Example: `1`.
- repo_url (`RepoUrl`):
- Repo URL of the commit containing info like repo_id, repo_type, etc.
- """
- commit_url: str
- commit_message: str
- commit_description: str
- oid: str
- _endpoint: str | None = field(default=None, repr=False)
- pr_url: str | None = None
- # Computed from `commit_url` in `__post_init__`
- repo_url: RepoUrl = field(init=False)
- # Computed from `pr_url` in `__post_init__`
- pr_revision: str | None = field(init=False)
- pr_num: int | None = field(init=False)
- def __new__(cls, *args, commit_url: str, **kwargs):
- return str.__new__(cls, commit_url)
- def __post_init__(self):
- """Populate pr-related fields after initialization.
- See https://docs.python.org/3.10/library/dataclasses.html#post-init-processing.
- """
- # Repo info
- self.repo_url = RepoUrl(self.commit_url.split("/commit/")[0], endpoint=self._endpoint)
- # PR info
- if self.pr_url is not None:
- self.pr_revision = _parse_revision_from_pr_url(self.pr_url)
- self.pr_num = int(self.pr_revision.split("/")[-1])
- else:
- self.pr_revision = None
- self.pr_num = None
- @dataclass
- class AccessRequest:
- """Data structure containing information about a user access request.
- Attributes:
- username (`str`):
- Username of the user who requested access.
- fullname (`str`):
- Fullname of the user who requested access.
- email (`Optional[str]`):
- Email of the user who requested access.
- Can only be `None` in the /accepted list if the user was granted access manually.
- timestamp (`datetime`):
- Timestamp of the request.
- status (`Literal["pending", "accepted", "rejected"]`):
- Status of the request. Can be one of `["pending", "accepted", "rejected"]`.
- fields (`dict[str, Any]`, *optional*):
- Additional fields filled by the user in the gate form.
- """
- username: str
- fullname: str
- email: str | None
- timestamp: datetime
- status: Literal["pending", "accepted", "rejected"]
- # Additional fields filled by the user in the gate form
- fields: dict[str, Any] | None = None
- @dataclass
- class WebhookWatchedItem:
- """Data structure containing information about the items watched by a webhook.
- Attributes:
- type (`Literal["dataset", "model", "org", "space", "user"]`):
- Type of the item to be watched. Can be one of `["dataset", "model", "org", "space", "user"]`.
- name (`str`):
- Name of the item to be watched. Can be the username, organization name, model name, dataset name or space name.
- """
- type: Literal["dataset", "model", "org", "space", "user"]
- name: str
- @dataclass
- class WebhookInfo:
- """Data structure containing information about a webhook.
- One of `url` or `job` is specified, but not both.
- Attributes:
- id (`str`):
- ID of the webhook.
- url (`str`, *optional*):
- URL of the webhook.
- job (`JobSpec`, *optional*):
- Specifications of the Job to trigger.
- watched (`list[WebhookWatchedItem]`):
- List of items watched by the webhook, see [`WebhookWatchedItem`].
- domains (`list[WEBHOOK_DOMAIN_T]`):
- List of domains the webhook is watching. Can be one of `["repo", "discussions"]`.
- secret (`str`, *optional*):
- Secret of the webhook.
- disabled (`bool`):
- Whether the webhook is disabled or not.
- """
- id: str
- url: str | None
- job: JobSpec | None
- watched: list[WebhookWatchedItem]
- domains: list[constants.WEBHOOK_DOMAIN_T]
- secret: str | None
- disabled: bool
- class RepoUrl(str):
- """Subclass of `str` describing a repo URL on the Hub.
- `RepoUrl` is returned by `HfApi.create_repo`. It inherits from `str` for backward
- compatibility. At initialization, the URL is parsed to populate properties:
- - endpoint (`str`)
- - namespace (`Optional[str]`)
- - repo_name (`str`)
- - repo_id (`str`)
- - repo_type (`Literal["model", "dataset", "space"]`)
- - url (`str`)
- Args:
- url (`Any`):
- String value of the repo url.
- endpoint (`str`, *optional*):
- Endpoint of the Hub. Defaults to <https://huggingface.co>.
- Example:
- ```py
- >>> RepoUrl('https://huggingface.co/gpt2')
- RepoUrl('https://huggingface.co/gpt2', endpoint='https://huggingface.co', repo_type='model', repo_id='gpt2')
- >>> RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co')
- RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co', repo_type='dataset', repo_id='dummy_user/dummy_dataset')
- >>> RepoUrl('hf://datasets/my-user/my-dataset')
- RepoUrl('hf://datasets/my-user/my-dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='user/dataset')
- >>> HfApi.create_repo("dummy_model")
- RepoUrl('https://huggingface.co/Wauplin/dummy_model', endpoint='https://huggingface.co', repo_type='model', repo_id='Wauplin/dummy_model')
- ```
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If URL cannot be parsed.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If `repo_type` is unknown.
- """
- def __new__(cls, url: Any, endpoint: str | None = None):
- url = fix_hf_endpoint_in_url(url, endpoint=endpoint)
- return super().__new__(cls, url)
- def __init__(self, url: Any, endpoint: str | None = None) -> None:
- super().__init__()
- # Parse URL
- self.endpoint = endpoint or constants.ENDPOINT
- repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(self, hub_url=self.endpoint)
- # Populate fields
- self.namespace = namespace
- self.repo_name = repo_name
- self.repo_id = repo_name if namespace is None else f"{namespace}/{repo_name}"
- self.repo_type = repo_type or constants.REPO_TYPE_MODEL
- self.url = str(self) # just in case it's needed
- def __repr__(self) -> str:
- return f"RepoUrl('{self}', endpoint='{self.endpoint}', repo_type='{self.repo_type}', repo_id='{self.repo_id}')"
- @dataclass(frozen=True)
- class _BucketCopyHandle:
- bucket_id: str
- path: str
- @dataclass(frozen=True)
- class _RepoCopyHandle:
- repo_type: Literal["model", "dataset", "space"]
- repo_id: str
- revision: str
- path: str
- @dataclass
- class RepoSibling:
- """
- Contains basic information about a repo file inside a repo on the Hub.
- > [!TIP]
- > All attributes of this class are optional except `rfilename`. This is because only the file names are returned when
- > listing repositories on the Hub (with [`list_models`], [`list_datasets`] or [`list_spaces`]). If you need more
- > information like file size, blob id or lfs details, you must request them specifically from one repo at a time
- > (using [`model_info`], [`dataset_info`] or [`space_info`]) as it adds more constraints on the backend server to
- > retrieve these.
- Attributes:
- rfilename (str):
- file name, relative to the repo root.
- size (`int`, *optional*):
- The file's size, in bytes. This attribute is defined when `files_metadata` argument of [`repo_info`] is set
- to `True`. It's `None` otherwise.
- blob_id (`str`, *optional*):
- The file's git OID. This attribute is defined when `files_metadata` argument of [`repo_info`] is set to
- `True`. It's `None` otherwise.
- lfs (`BlobLfsInfo`, *optional*):
- The file's LFS metadata. This attribute is defined when`files_metadata` argument of [`repo_info`] is set to
- `True` and the file is stored with Git LFS. It's `None` otherwise.
- """
- rfilename: str
- size: int | None = None
- blob_id: str | None = None
- lfs: BlobLfsInfo | None = None
- @dataclass
- class RepoFile:
- """
- Contains information about a file on the Hub.
- Attributes:
- path (str):
- file path relative to the repo root.
- size (`int`):
- The file's size, in bytes.
- blob_id (`str`):
- The file's git OID.
- lfs (`BlobLfsInfo`, *optional*):
- The file's LFS metadata.
- xet_hash (`str`, *optional*):
- The file's Xet hash.
- last_commit (`LastCommitInfo`, *optional*):
- The file's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`]
- are called with `expand=True`.
- security (`BlobSecurityInfo`, *optional*):
- The file's security scan metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`]
- are called with `expand=True`.
- """
- path: str
- size: int
- blob_id: str
- lfs: BlobLfsInfo | None = None
- xet_hash: str | None = None
- last_commit: LastCommitInfo | None = None
- security: BlobSecurityInfo | None = None
- def __init__(self, **kwargs):
- self.path = kwargs.pop("path")
- self.size = kwargs.pop("size")
- self.blob_id = kwargs.pop("oid")
- lfs = kwargs.pop("lfs", None)
- if lfs is not None:
- lfs = BlobLfsInfo(size=lfs["size"], sha256=lfs["oid"], pointer_size=lfs["pointerSize"])
- self.lfs = lfs
- self.xet_hash = kwargs.pop("xetHash", None)
- last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None)
- if last_commit is not None:
- last_commit = LastCommitInfo(
- oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"])
- )
- self.last_commit = last_commit
- security = kwargs.pop("securityFileStatus", None)
- if security is not None:
- safe = security["status"] == "safe"
- security = BlobSecurityInfo(
- safe=safe,
- status=security["status"],
- av_scan=security["avScan"],
- pickle_import_scan=security["pickleImportScan"],
- )
- self.security = security
- # backwards compatibility
- self.rfilename = self.path
- self.lastCommit = self.last_commit
- @dataclass
- class RepoFolder:
- """
- Contains information about a folder on the Hub.
- Attributes:
- path (str):
- folder path relative to the repo root.
- tree_id (`str`):
- The folder's git OID.
- last_commit (`LastCommitInfo`, *optional*):
- The folder's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`]
- are called with `expand=True`.
- """
- path: str
- tree_id: str
- last_commit: LastCommitInfo | None = None
- def __init__(self, **kwargs):
- self.path = kwargs.pop("path")
- self.tree_id = kwargs.pop("oid")
- last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None)
- if last_commit is not None:
- last_commit = LastCommitInfo(
- oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"])
- )
- self.last_commit = last_commit
- @dataclass
- class InferenceProviderMapping:
- provider: PROVIDER_T # Provider name
- hf_model_id: str # ID of the model on the Hugging Face Hub
- provider_id: str # ID of the model on the provider's side
- status: Literal["error", "live", "staging"]
- task: str
- adapter: str | None = None
- adapter_weights_path: str | None = None
- type: Literal["single-model", "tag-filter"] | None = None
- def __init__(self, **kwargs):
- self.provider = kwargs.pop("provider")
- self.hf_model_id = kwargs.pop("hf_model_id")
- self.provider_id = kwargs.pop("providerId")
- self.status = kwargs.pop("status")
- self.task = kwargs.pop("task")
- self.adapter = kwargs.pop("adapter", None)
- self.adapter_weights_path = kwargs.pop("adapterWeightsPath", None)
- self.type = kwargs.pop("type", None)
- self.__dict__.update(**kwargs)
- @dataclass
- class ModelInfo:
- """
- Contains information about a model on the Hub. This object is returned by [`model_info`] and [`list_models`].
- > [!TIP]
- > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made.
- > In general, the more specific the query, the more information is returned. On the contrary, when listing models
- > using [`list_models`] only a subset of the attributes are returned.
- Attributes:
- id (`str`):
- ID of model.
- author (`str`, *optional*):
- Author of the model.
- base_models (`list[str]`, *optional*):
- List of base models this model is derived from.
- card_data (`ModelCardData`, *optional*):
- Model Card Metadata as a [`huggingface_hub.repocard_data.ModelCardData`] object.
- children_model_count (`int`, *optional*):
- Number of children models derived from this model.
- config (`dict`, *optional*):
- Model configuration.
- created_at (`datetime`, *optional*):
- Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`,
- corresponding to the date when we began to store creation dates.
- disabled (`bool`, *optional*):
- Is the repo disabled.
- downloads (`int`):
- Number of downloads of the model over the last 30 days.
- downloads_all_time (`int`):
- Cumulated number of downloads of the model since its creation.
- eval_results (`list[EvalResultEntry]`, *optional*):
- Model's evaluation results.
- gated (`Literal["auto", "manual", False]`, *optional*):
- Is the repo gated.
- If so, whether there is manual or automatic approval.
- gguf (`dict`, *optional*):
- GGUF information of the model.
- inference (`Literal["warm"]`, *optional*):
- Status of the model on Inference Providers. Warm if the model is served by at least one provider.
- inference_provider_mapping (`list[InferenceProviderMapping]`, *optional*):
- A list of [`InferenceProviderMapping`] ordered after the user's provider order.
- last_modified (`datetime`, *optional*):
- Date of last commit to the repo.
- library_name (`str`, *optional*):
- Library associated with the model.
- likes (`int`):
- Number of likes of the model.
- mask_token (`str`, *optional*):
- Mask token used by the model.
- model_index (`dict`, *optional*):
- Model index for evaluation.
- pipeline_tag (`str`, *optional*):
- Pipeline tag associated with the model.
- private (`bool`):
- Is the repo private.
- resource_group (`dict`, *optional*):
- Resource group information for the model.
- safetensors (`SafeTensorsInfo`, *optional*):
- Model's safetensors information.
- security_repo_status (`dict`, *optional*):
- Model's security scan status.
- sha (`str`, *optional*):
- Repo SHA at this particular revision.
- siblings (`list[RepoSibling]`):
- List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the model.
- spaces (`list[str]`, *optional*):
- List of spaces using the model.
- tags (`list[str]`):
- List of tags of the model. Compared to `card_data.tags`, contains extra tags computed by the Hub
- (e.g. supported libraries, model's arXiv).
- transformers_info (`TransformersInfo`, *optional*):
- Transformers-specific info (auto class, processor, etc.) associated with the model.
- trending_score (`int`, *optional*):
- Trending score of the model.
- used_storage (`int`, *optional*):
- Size in bytes of the model on the Hub.
- widget_data (`Any`, *optional*):
- Widget data associated with the model.
- """
- id: str
- author: str | None
- base_models: list[str] | None
- card_data: ModelCardData | None
- children_model_count: int | None
- config: dict | None
- created_at: datetime | None
- disabled: bool | None
- downloads: int | None
- downloads_all_time: int | None
- eval_results: list[EvalResultEntry] | None
- gated: Literal["auto", "manual", False] | None
- gguf: dict | None
- inference: Literal["warm"] | None
- inference_provider_mapping: list[InferenceProviderMapping] | None
- last_modified: datetime | None
- library_name: str | None
- likes: int | None
- mask_token: str | None
- model_index: dict | None
- pipeline_tag: str | None
- private: bool | None
- resource_group: dict | None
- safetensors: SafeTensorsInfo | None
- security_repo_status: dict | None
- sha: str | None
- siblings: list[RepoSibling] | None
- spaces: list[str] | None
- tags: list[str] | None
- transformers_info: TransformersInfo | None
- trending_score: int | None
- used_storage: int | None
- widget_data: Any | None
- def __init__(self, **kwargs):
- self.id = kwargs.pop("id")
- self.author = kwargs.pop("author", None)
- self.sha = kwargs.pop("sha", None)
- last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None)
- self.last_modified = parse_datetime(last_modified) if last_modified else None
- created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None)
- self.created_at = parse_datetime(created_at) if created_at else None
- self.private = kwargs.pop("private", None)
- self.gated = kwargs.pop("gated", None)
- self.disabled = kwargs.pop("disabled", None)
- self.downloads = kwargs.pop("downloads", None)
- self.downloads_all_time = kwargs.pop("downloadsAllTime", None)
- self.likes = kwargs.pop("likes", None)
- self.library_name = kwargs.pop("library_name", None)
- self.gguf = kwargs.pop("gguf", None)
- self.inference = kwargs.pop("inference", None)
- # little hack to simplify Inference Providers logic and make it backward and forward compatible
- # right now, API returns a dict on model_info and a list on list_models. Let's harmonize to list.
- mapping = kwargs.pop("inferenceProviderMapping", None)
- if isinstance(mapping, list):
- self.inference_provider_mapping = [
- InferenceProviderMapping(**{**value, "hf_model_id": self.id}) for value in mapping
- ]
- elif isinstance(mapping, dict):
- self.inference_provider_mapping = [
- InferenceProviderMapping(**{**value, "hf_model_id": self.id, "provider": provider})
- for provider, value in mapping.items()
- ]
- elif mapping is None:
- self.inference_provider_mapping = None
- else:
- raise ValueError(
- f"Unexpected type for `inferenceProviderMapping`. Expecting `dict` or `list`. Got {mapping}."
- )
- self.tags = kwargs.pop("tags", None)
- self.pipeline_tag = kwargs.pop("pipeline_tag", None)
- self.mask_token = kwargs.pop("mask_token", None)
- self.trending_score = kwargs.pop("trendingScore", None)
- self.used_storage = kwargs.pop("usedStorage", None)
- card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None)
- self.card_data = (
- ModelCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
- )
- self.widget_data = kwargs.pop("widgetData", None)
- self.model_index = kwargs.pop("model-index", None) or kwargs.pop("model_index", None)
- self.config = kwargs.pop("config", None)
- transformers_info = kwargs.pop("transformersInfo", None) or kwargs.pop("transformers_info", None)
- self.transformers_info = TransformersInfo(**transformers_info) if transformers_info else None
- siblings = kwargs.pop("siblings", None)
- self.siblings = (
- [
- RepoSibling(
- rfilename=sibling["rfilename"],
- size=sibling.get("size"),
- blob_id=sibling.get("blobId"),
- lfs=(
- BlobLfsInfo(
- size=sibling["lfs"]["size"],
- sha256=sibling["lfs"]["sha256"],
- pointer_size=sibling["lfs"]["pointerSize"],
- )
- if sibling.get("lfs")
- else None
- ),
- )
- for sibling in siblings
- ]
- if siblings is not None
- else None
- )
- self.spaces = kwargs.pop("spaces", None)
- safetensors = kwargs.pop("safetensors", None)
- self.safetensors = (
- SafeTensorsInfo(
- parameters=safetensors["parameters"],
- total=safetensors["total"],
- )
- if safetensors
- else None
- )
- self.security_repo_status = kwargs.pop("securityRepoStatus", None)
- eval_results = kwargs.pop("evalResults", None)
- self.eval_results = parse_eval_result_entries(eval_results) if eval_results else None
- self.base_models = kwargs.pop("baseModels", None)
- self.children_model_count = kwargs.pop("childrenModelCount", None)
- self.resource_group = kwargs.pop("resourceGroup", None)
- # backwards compatibility
- self.lastModified = self.last_modified
- self.cardData = self.card_data
- self.transformersInfo = self.transformers_info
- self.__dict__.update(**kwargs)
- @dataclass
- class DatasetInfo:
- """
- Contains information about a dataset on the Hub. This object is returned by [`dataset_info`] and [`list_datasets`].
- > [!TIP]
- > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made.
- > In general, the more specific the query, the more information is returned. On the contrary, when listing datasets
- > using [`list_datasets`] only a subset of the attributes are returned.
- Attributes:
- id (`str`):
- ID of dataset.
- author (`str`):
- Author of the dataset.
- card_data (`DatasetCardData`, *optional*):
- Dataset Card Metadata as a [`huggingface_hub.repocard_data.DatasetCardData`] object.
- citation (`str`, *optional*):
- Citation information for the dataset.
- created_at (`datetime`, *optional*):
- Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`,
- corresponding to the date when we began to store creation dates.
- description (`str`, *optional*):
- Description of the dataset.
- disabled (`bool`, *optional*):
- Is the repo disabled.
- downloads (`int`):
- Number of downloads of the dataset over the last 30 days.
- downloads_all_time (`int`):
- Cumulated number of downloads of the dataset since its creation.
- gated (`Literal["auto", "manual", False]`, *optional*):
- Is the repo gated.
- If so, whether there is manual or automatic approval.
- last_modified (`datetime`, *optional*):
- Date of last commit to the repo.
- likes (`int`):
- Number of likes of the dataset.
- paperswithcode_id (`str`, *optional*):
- Papers with code ID of the dataset.
- private (`bool`):
- Is the repo private.
- resource_group (`dict`, *optional*):
- Resource group information for the dataset.
- sha (`str`):
- Repo SHA at this particular revision.
- siblings (`list[RepoSibling]`):
- List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the dataset.
- tags (`list[str]`):
- List of tags of the dataset.
- trending_score (`int`, *optional*):
- Trending score of the dataset.
- used_storage (`int`, *optional*):
- Size in bytes of the dataset on the Hub.
- """
- id: str
- author: str | None
- card_data: DatasetCardData | None
- citation: str | None
- created_at: datetime | None
- description: str | None
- disabled: bool | None
- downloads: int | None
- downloads_all_time: int | None
- gated: Literal["auto", "manual", False] | None
- last_modified: datetime | None
- likes: int | None
- paperswithcode_id: str | None
- private: bool | None
- resource_group: dict | None
- sha: str | None
- siblings: list[RepoSibling] | None
- tags: list[str] | None
- trending_score: int | None
- used_storage: int | None
- def __init__(self, **kwargs):
- self.id = kwargs.pop("id")
- self.author = kwargs.pop("author", None)
- self.sha = kwargs.pop("sha", None)
- created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None)
- self.created_at = parse_datetime(created_at) if created_at else None
- last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None)
- self.last_modified = parse_datetime(last_modified) if last_modified else None
- self.private = kwargs.pop("private", None)
- self.gated = kwargs.pop("gated", None)
- self.disabled = kwargs.pop("disabled", None)
- self.downloads = kwargs.pop("downloads", None)
- self.downloads_all_time = kwargs.pop("downloadsAllTime", None)
- self.likes = kwargs.pop("likes", None)
- self.paperswithcode_id = kwargs.pop("paperswithcode_id", None)
- self.tags = kwargs.pop("tags", None)
- self.trending_score = kwargs.pop("trendingScore", None)
- self.used_storage = kwargs.pop("usedStorage", None)
- card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None)
- self.card_data = (
- DatasetCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
- )
- siblings = kwargs.pop("siblings", None)
- self.siblings = (
- [
- RepoSibling(
- rfilename=sibling["rfilename"],
- size=sibling.get("size"),
- blob_id=sibling.get("blobId"),
- lfs=(
- BlobLfsInfo(
- size=sibling["lfs"]["size"],
- sha256=sibling["lfs"]["sha256"],
- pointer_size=sibling["lfs"]["pointerSize"],
- )
- if sibling.get("lfs")
- else None
- ),
- )
- for sibling in siblings
- ]
- if siblings is not None
- else None
- )
- self.citation = kwargs.pop("citation", None)
- self.description = kwargs.pop("description", None)
- self.resource_group = kwargs.pop("resourceGroup", None)
- # backwards compatibility
- self.lastModified = self.last_modified
- self.cardData = self.card_data
- self.__dict__.update(**kwargs)
- @dataclass
- class SpaceInfo:
- """
- Contains information about a Space on the Hub. This object is returned by [`space_info`] and [`list_spaces`].
- > [!TIP]
- > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made.
- > In general, the more specific the query, the more information is returned. On the contrary, when listing spaces
- > using [`list_spaces`] only a subset of the attributes are returned.
- Attributes:
- id (`str`):
- ID of the Space.
- author (`str`, *optional*):
- Author of the Space.
- card_data (`SpaceCardData`, *optional*):
- Space Card Metadata as a [`huggingface_hub.repocard_data.SpaceCardData`] object.
- created_at (`datetime`, *optional*):
- Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`,
- corresponding to the date when we began to store creation dates.
- datasets (`list[str]`, *optional*):
- List of datasets used by the Space.
- disabled (`bool`, *optional*):
- Is the Space disabled.
- gated (`Literal["auto", "manual", False]`, *optional*):
- Is the repo gated.
- If so, whether there is manual or automatic approval.
- host (`str`, *optional*):
- Host URL of the Space.
- last_modified (`datetime`, *optional*):
- Date of last commit to the repo.
- likes (`int`):
- Number of likes of the Space.
- models (`list[str]`, *optional*):
- List of models used by the Space.
- private (`bool`):
- Is the repo private.
- resource_group (`dict`, *optional*):
- Resource group information for the Space.
- runtime (`SpaceRuntime`, *optional*):
- Space runtime information as a [`huggingface_hub.hf_api.SpaceRuntime`] object.
- sdk (`str`, *optional*):
- SDK used by the Space.
- sha (`str`, *optional*):
- Repo SHA at this particular revision.
- siblings (`list[RepoSibling]`):
- List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the Space.
- subdomain (`str`, *optional*):
- Subdomain of the Space.
- tags (`list[str]`):
- List of tags of the Space.
- trending_score (`int`, *optional*):
- Trending score of the Space.
- used_storage (`int`, *optional*):
- Size in bytes of the Space on the Hub.
- """
- id: str
- author: str | None
- card_data: SpaceCardData | None
- created_at: datetime | None
- datasets: list[str] | None
- disabled: bool | None
- gated: Literal["auto", "manual", False] | None
- host: str | None
- last_modified: datetime | None
- likes: int | None
- models: list[str] | None
- private: bool | None
- resource_group: dict | None
- runtime: SpaceRuntime | None
- sdk: str | None
- sha: str | None
- siblings: list[RepoSibling] | None
- subdomain: str | None
- tags: list[str] | None
- trending_score: int | None
- used_storage: int | None
- def __init__(self, **kwargs):
- self.id = kwargs.pop("id")
- self.author = kwargs.pop("author", None)
- self.sha = kwargs.pop("sha", None)
- created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None)
- self.created_at = parse_datetime(created_at) if created_at else None
- last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None)
- self.last_modified = parse_datetime(last_modified) if last_modified else None
- self.private = kwargs.pop("private", None)
- self.gated = kwargs.pop("gated", None)
- self.disabled = kwargs.pop("disabled", None)
- self.host = kwargs.pop("host", None)
- self.subdomain = kwargs.pop("subdomain", None)
- self.likes = kwargs.pop("likes", None)
- self.sdk = kwargs.pop("sdk", None)
- self.tags = kwargs.pop("tags", None)
- self.trending_score = kwargs.pop("trendingScore", None)
- self.used_storage = kwargs.pop("usedStorage", None)
- card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None)
- self.card_data = (
- SpaceCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
- )
- siblings = kwargs.pop("siblings", None)
- self.siblings = (
- [
- RepoSibling(
- rfilename=sibling["rfilename"],
- size=sibling.get("size"),
- blob_id=sibling.get("blobId"),
- lfs=(
- BlobLfsInfo(
- size=sibling["lfs"]["size"],
- sha256=sibling["lfs"]["sha256"],
- pointer_size=sibling["lfs"]["pointerSize"],
- )
- if sibling.get("lfs")
- else None
- ),
- )
- for sibling in siblings
- ]
- if siblings is not None
- else None
- )
- runtime = kwargs.pop("runtime", None)
- self.runtime = SpaceRuntime(runtime) if runtime else None
- self.models = kwargs.pop("models", None)
- self.datasets = kwargs.pop("datasets", None)
- self.resource_group = kwargs.pop("resourceGroup", None)
- # backwards compatibility
- self.lastModified = self.last_modified
- self.cardData = self.card_data
- self.__dict__.update(**kwargs)
- @dataclass
- class KernelInfo:
- """
- Contains information about a kernel repo on the Hub. This object is returned by [`kernel_info`].
- Attributes:
- id (`str`):
- ID of the kernel repo.
- author (`str`, *optional*):
- Author of the kernel repo.
- downloads (`int`, *optional*):
- Number of downloads of the kernel repo over the last 30 days.
- gated (`Literal["auto", "manual", False]`, *optional*):
- Is the repo gated. If so, whether there is manual or automatic approval.
- last_modified (`datetime`, *optional*):
- Date of last commit to the repo.
- likes (`int`, *optional*):
- Number of likes of the kernel repo.
- private (`bool`, *optional*):
- Is the repo private.
- sha (`str`, *optional*):
- Repo SHA at this particular revision.
- """
- id: str
- author: str | None
- downloads: int | None
- gated: Literal["auto", "manual", False] | None
- last_modified: datetime | None
- likes: int | None
- private: bool | None
- sha: str | None
- def __init__(self, **kwargs):
- self.id = kwargs.pop("id")
- self.author = kwargs.pop("author", None)
- self.downloads = kwargs.pop("downloads", None)
- self.gated = kwargs.pop("gated", None)
- last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None)
- self.last_modified = parse_datetime(last_modified) if last_modified else None
- self.likes = kwargs.pop("likes", None)
- self.private = kwargs.pop("private", None)
- self.sha = kwargs.pop("sha", None)
- # future compatibility
- self.__dict__.update(**kwargs)
- @dataclass
- class CollectionItem:
- """
- Contains information about an item of a Collection (model, dataset, Space, paper, collection or bucket).
- Attributes:
- item_object_id (`str`):
- Unique ID of the item in the collection.
- item_id (`str`):
- ID of the underlying object on the Hub. Can be either a repo_id, a paper id, a collection slug
- or a bucket id.
- e.g. `"jbilcke-hf/ai-comic-factory"`, `"2307.09288"`, `"celinah/cerebras-function-calling-682607169c35fbfa98b30b9a"`.
- item_type (`str`):
- Type of the underlying object. Can be one of `"model"`, `"dataset"`, `"space"`, `"paper"`, `"collection"`
- or `"bucket"`.
- position (`int`):
- Position of the item in the collection.
- note (`str`, *optional*):
- Note associated with the item, as plain text.
- """
- item_object_id: str # id in database
- item_id: str # repo_id or paper id
- item_type: str
- position: int
- note: str | None = None
- def __init__(
- self,
- _id: str,
- id: str,
- type: CollectionItemType_T,
- position: int,
- note: dict | None = None,
- **kwargs,
- ) -> None:
- self.item_object_id: str = _id # id in database
- self.item_id: str = id # repo_id or paper id
- # if the item is a collection, override item_id with the slug
- slug = kwargs.get("slug")
- if slug is not None:
- self.item_id = slug # collection slug
- self.item_type: CollectionItemType_T = type
- self.position: int = position
- note_text = note.get("text") if note is not None else None
- self.note = note_text if isinstance(note_text, str) else None
- @dataclass
- class Collection:
- """
- Contains information about a Collection on the Hub.
- Attributes:
- slug (`str`):
- Slug of the collection. E.g. `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- title (`str`):
- Title of the collection. E.g. `"Recent models"`.
- owner (`str`):
- Owner of the collection. E.g. `"TheBloke"`.
- items (`list[CollectionItem]`):
- List of items in the collection.
- last_updated (`datetime`):
- Date of the last update of the collection.
- position (`int`):
- Position of the collection in the list of collections of the owner.
- private (`bool`):
- Whether the collection is private or not.
- theme (`str`):
- Theme of the collection. E.g. `"green"`.
- upvotes (`int`):
- Number of upvotes of the collection.
- description (`str`, *optional*):
- Description of the collection, as plain text.
- url (`str`):
- (property) URL of the collection on the Hub.
- """
- slug: str
- title: str
- owner: str
- items: list[CollectionItem]
- last_updated: datetime
- position: int
- private: bool
- theme: str
- upvotes: int
- description: str | None = None
- def __init__(self, **kwargs) -> None:
- self.slug = kwargs.pop("slug")
- self.title = kwargs.pop("title")
- self.owner = kwargs.pop("owner")
- self.items = [CollectionItem(**item) for item in kwargs.pop("items")]
- self.last_updated = parse_datetime(kwargs.pop("lastUpdated"))
- self.position = kwargs.pop("position")
- self.private = kwargs.pop("private")
- self.theme = kwargs.pop("theme")
- self.upvotes = kwargs.pop("upvotes")
- self.description = kwargs.pop("description", None)
- endpoint = kwargs.pop("endpoint", None)
- if endpoint is None:
- endpoint = constants.ENDPOINT
- self._url = f"{endpoint}/collections/{self.slug}"
- @property
- def url(self) -> str:
- """Returns the URL of the collection on the Hub."""
- return self._url
- @dataclass
- class GitRefInfo:
- """
- Contains information about a git reference for a repo on the Hub.
- Attributes:
- name (`str`):
- Name of the reference (e.g. tag name or branch name).
- ref (`str`):
- Full git ref on the Hub (e.g. `"refs/heads/main"` or `"refs/tags/v1.0"`).
- target_commit (`str`):
- OID of the target commit for the ref (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`)
- """
- name: str
- ref: str
- target_commit: str
- @dataclass
- class GitRefs:
- """
- Contains information about all git references for a repo on the Hub.
- Object is returned by [`list_repo_refs`].
- Attributes:
- branches (`list[GitRefInfo]`):
- A list of [`GitRefInfo`] containing information about branches on the repo.
- converts (`list[GitRefInfo]`):
- A list of [`GitRefInfo`] containing information about "convert" refs on the repo.
- Converts are refs used (internally) to push preprocessed data in Dataset repos.
- tags (`list[GitRefInfo]`):
- A list of [`GitRefInfo`] containing information about tags on the repo.
- pull_requests (`list[GitRefInfo]`, *optional*):
- A list of [`GitRefInfo`] containing information about pull requests on the repo.
- Only returned if `include_prs=True` is set.
- """
- branches: list[GitRefInfo]
- converts: list[GitRefInfo]
- tags: list[GitRefInfo]
- pull_requests: list[GitRefInfo] | None = None
- @dataclass
- class GitCommitInfo:
- """
- Contains information about a git commit for a repo on the Hub. Check out [`list_repo_commits`] for more details.
- Attributes:
- commit_id (`str`):
- OID of the commit (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`)
- authors (`list[str]`):
- List of authors of the commit.
- created_at (`datetime`):
- Datetime when the commit was created.
- title (`str`):
- Title of the commit. This is a free-text value entered by the authors.
- message (`str`):
- Description of the commit. This is a free-text value entered by the authors.
- formatted_title (`str`):
- Title of the commit formatted as HTML. Only returned if `formatted=True` is set.
- formatted_message (`str`):
- Description of the commit formatted as HTML. Only returned if `formatted=True` is set.
- """
- commit_id: str
- authors: list[str]
- created_at: datetime
- title: str
- message: str
- formatted_title: str | None
- formatted_message: str | None
- @dataclass
- class UserLikes:
- """
- Contains information about a user likes on the Hub.
- Attributes:
- user (`str`):
- Name of the user for which we fetched the likes.
- total (`int`):
- Total number of likes.
- datasets (`list[str]`):
- List of datasets liked by the user (as repo_ids).
- kernels (`list[str]`):
- List of kernels liked by the user (as repo_ids).
- models (`list[str]`):
- List of models liked by the user (as repo_ids).
- spaces (`list[str]`):
- List of spaces liked by the user (as repo_ids).
- """
- # Metadata
- user: str
- total: int
- # User likes
- datasets: list[str]
- kernels: list[str]
- models: list[str]
- spaces: list[str]
- @dataclass
- class Organization:
- """
- Contains information about an organization on the Hub.
- Attributes:
- avatar_url (`str`):
- URL of the organization's avatar.
- name (`str`):
- Name of the organization on the Hub (unique).
- fullname (`str`):
- Organization's full name.
- details (`str`, *optional*):
- Organization's description.
- is_verified (`bool`, *optional*):
- Whether the organization is verified.
- is_following (`bool`, *optional*):
- Whether the authenticated user follows this organization.
- num_users (`int`, *optional*):
- Number of members in the organization.
- num_models (`int`, *optional*):
- Number of models owned by the organization.
- num_spaces (`int`, *optional*):
- Number of Spaces owned by the organization.
- num_datasets (`int`, *optional*):
- Number of datasets owned by the organization.
- num_followers (`int`, *optional*):
- Number of followers of the organization.
- num_papers (`int`, *optional*):
- Number of papers authored by the organization.
- plan (`str`, *optional*):
- The organization's plan (e.g., "enterprise", "team").
- """
- avatar_url: str
- name: str
- fullname: str
- details: str | None = None
- is_verified: bool | None = None
- is_following: bool | None = None
- num_users: int | None = None
- num_models: int | None = None
- num_spaces: int | None = None
- num_datasets: int | None = None
- num_followers: int | None = None
- num_papers: int | None = None
- plan: str | None = None
- def __init__(self, **kwargs) -> None:
- self.avatar_url = kwargs.pop("avatarUrl", "")
- self.name = kwargs.pop("name", "")
- self.fullname = kwargs.pop("fullname", "")
- self.details = kwargs.pop("details", None)
- self.is_verified = kwargs.pop("isVerified", None)
- self.is_following = kwargs.pop("isFollowing", None)
- self.num_users = kwargs.pop("numUsers", None)
- self.num_models = kwargs.pop("numModels", None)
- self.num_spaces = kwargs.pop("numSpaces", None)
- self.num_datasets = kwargs.pop("numDatasets", None)
- self.num_followers = kwargs.pop("numFollowers", None)
- self.num_papers = kwargs.pop("numPapers", None)
- self.plan = kwargs.pop("plan", None)
- # forward compatibility
- self.__dict__.update(**kwargs)
- @dataclass
- class User:
- """
- Contains information about a user on the Hub.
- Attributes:
- username (`str`):
- Name of the user on the Hub (unique).
- fullname (`str`):
- User's full name.
- avatar_url (`str`):
- URL of the user's avatar.
- details (`str`, *optional*):
- User's details.
- is_following (`bool`, *optional*):
- Whether the authenticated user is following this user.
- is_pro (`bool`, *optional*):
- Whether the user is a pro user.
- num_models (`int`, *optional*):
- Number of models created by the user.
- num_datasets (`int`, *optional*):
- Number of datasets created by the user.
- num_spaces (`int`, *optional*):
- Number of spaces created by the user.
- num_discussions (`int`, *optional*):
- Number of discussions initiated by the user.
- num_papers (`int`, *optional*):
- Number of papers authored by the user.
- num_upvotes (`int`, *optional*):
- Number of upvotes received by the user.
- num_likes (`int`, *optional*):
- Number of likes given by the user.
- num_following (`int`, *optional*):
- Number of users this user is following.
- num_followers (`int`, *optional*):
- Number of users following this user.
- orgs (list of [`Organization`]):
- List of organizations the user is part of.
- """
- # Metadata
- username: str
- fullname: str
- avatar_url: str
- details: str | None = None
- is_following: bool | None = None
- is_pro: bool | None = None
- num_models: int | None = None
- num_datasets: int | None = None
- num_spaces: int | None = None
- num_discussions: int | None = None
- num_papers: int | None = None
- num_upvotes: int | None = None
- num_likes: int | None = None
- num_following: int | None = None
- num_followers: int | None = None
- orgs: list[Organization] = field(default_factory=list)
- def __init__(self, **kwargs) -> None:
- self.username = kwargs.pop("user", "")
- self.fullname = kwargs.pop("fullname", "")
- self.avatar_url = kwargs.pop("avatarUrl", "")
- self.is_following = kwargs.pop("isFollowing", None)
- self.is_pro = kwargs.pop("isPro", None)
- self.details = kwargs.pop("details", None)
- self.num_models = kwargs.pop("numModels", None)
- self.num_datasets = kwargs.pop("numDatasets", None)
- self.num_spaces = kwargs.pop("numSpaces", None)
- self.num_discussions = kwargs.pop("numDiscussions", None)
- self.num_papers = kwargs.pop("numPapers", None)
- self.num_upvotes = kwargs.pop("numUpvotes", None)
- self.num_likes = kwargs.pop("numLikes", None)
- self.num_following = kwargs.pop("numFollowing", None)
- self.num_followers = kwargs.pop("numFollowers", None)
- self.user_type = kwargs.pop("type", None)
- self.orgs = [Organization(**org) for org in kwargs.pop("orgs", [])]
- # forward compatibility
- self.__dict__.update(**kwargs)
- @dataclass
- class PaperAuthor:
- """
- Contains information about a paper author on the Hub.
- Attributes:
- name (`str`):
- Name of the author.
- user (`User`, *optional*):
- Information about the author as a [`User`] object.
- status (`str`, *optional*):
- Status of the author on the Hub.
- status_last_changed_at (`datetime`, *optional*):
- Date when the status of the author changed.
- hidden (`bool`, *optional*):
- Whether the author is hidden on the Hub.
- """
- name: str
- user: User | None
- status: str | None
- status_last_changed_at: datetime | None
- hidden: bool | None
- def __init__(self, **kwargs) -> None:
- self.name = kwargs.pop("name", "")
- user = kwargs.pop("user", None)
- self.user = User(**user) if user else None
- self.status = kwargs.pop("status", None)
- status_last_changed_at = kwargs.pop("statusLastChangedAt", None)
- self.status_last_changed_at = parse_datetime(status_last_changed_at) if status_last_changed_at else None
- self.hidden = kwargs.pop("hidden", None)
- self.__dict__.update(**kwargs)
- @dataclass
- class PaperInfo:
- """
- Contains information about a paper on the Hub.
- Attributes:
- id (`str`):
- arXiv paper ID.
- authors (`list[PaperAuthor]`, *optional*):
- Authors of the paper.
- published_at (`datetime`, *optional*):
- Date paper published.
- title (`str`, *optional*):
- Title of the paper.
- summary (`str`, *optional*):
- Summary of the paper.
- upvotes (`int`, *optional*):
- Number of upvotes for the paper on the Hub.
- discussion_id (`str`, *optional*):
- Discussion ID for the paper on the Hub.
- source (`str`, *optional*):
- Source of the paper.
- comments (`int`, *optional*):
- Number of comments for the paper on the Hub.
- submitted_at (`datetime`, *optional*):
- Date paper appeared in daily papers on the Hub.
- submitted_by (`User`, *optional*):
- Information about who submitted the daily paper.
- ai_summary (`str`, *optional*):
- AI summary of the paper.
- ai_keywords (`list[str]`, *optional*):
- AI keywords of the paper.
- organization (`Organization`, *optional*):
- Information about the organization associated with the paper.
- project_page (`str`, *optional*):
- URL of the project page for the paper.
- github_repo (`str`, *optional*):
- URL of the GitHub repository for the paper.
- github_stars (`int`, *optional*):
- Number of stars of the GitHub repository for the paper.
- """
- id: str
- authors: list[PaperAuthor] | None
- published_at: datetime | None
- title: str | None
- summary: str | None
- upvotes: int | None
- discussion_id: str | None
- source: str | None
- comments: int | None
- submitted_at: datetime | None
- submitted_by: User | None
- ai_summary: str | None
- ai_keywords: list[str] | None
- organization: Organization | None
- project_page: str | None
- github_repo: str | None
- github_stars: int | None
- def __init__(self, **kwargs) -> None:
- paper = kwargs.pop("paper", {})
- self.id = kwargs.pop("id", None) or paper.pop("id", None)
- authors = paper.pop("authors", None) or kwargs.pop("authors", None)
- self.authors = [PaperAuthor(**author) for author in authors] if authors else None
- published_at = paper.pop("publishedAt", None) or kwargs.pop("publishedAt", None)
- self.published_at = parse_datetime(published_at) if published_at else None
- self.title = kwargs.pop("title", None)
- self.source = kwargs.pop("source", None)
- self.summary = paper.pop("summary", None) or kwargs.pop("summary", None)
- self.upvotes = paper.pop("upvotes", None) or kwargs.pop("upvotes", None)
- self.discussion_id = paper.pop("discussionId", None) or kwargs.pop("discussionId", None)
- self.comments = kwargs.pop("numComments", 0)
- submitted_at = kwargs.pop("publishedAt", None) or kwargs.pop("submittedOnDailyAt", None)
- self.submitted_at = parse_datetime(submitted_at) if submitted_at else None
- submitted_by = kwargs.pop("submittedBy", None) or kwargs.pop("submittedOnDailyBy", None)
- self.submitted_by = User(**submitted_by) if submitted_by else None
- self.ai_summary = kwargs.pop("ai_summary", None)
- self.ai_keywords = kwargs.pop("ai_keywords", None)
- organization = kwargs.pop("organization", None)
- self.organization = Organization(**organization) if organization else None
- self.project_page = kwargs.pop("projectPage", None)
- self.github_repo = kwargs.pop("githubRepo", None)
- self.github_stars = kwargs.pop("githubStars", None)
- # forward compatibility
- self.__dict__.update(**kwargs)
- @dataclass
- class LFSFileInfo:
- """
- Contains information about a file stored as LFS on a repo on the Hub.
- Used in the context of listing and permanently deleting LFS files from a repo to free-up space.
- See [`list_lfs_files`] and [`permanently_delete_lfs_files`] for more details.
- Git LFS files are tracked using SHA-256 object IDs, rather than file paths, to optimize performance
- This approach is necessary because a single object can be referenced by multiple paths across different commits,
- making it impractical to search and resolve these connections. Check out [our documentation](https://huggingface.co/docs/hub/storage-limits#advanced-track-lfs-file-references)
- to learn how to know which filename(s) is(are) associated with each SHA.
- Attributes:
- file_oid (`str`):
- SHA-256 object ID of the file. This is the identifier to pass when permanently deleting the file.
- filename (`str`):
- Possible filename for the LFS object. See the note above for more information.
- oid (`str`):
- OID of the LFS object.
- pushed_at (`datetime`):
- Date the LFS object was pushed to the repo.
- ref (`str`, *optional*):
- Ref where the LFS object has been pushed (if any).
- size (`int`):
- Size of the LFS object.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> lfs_files = api.list_lfs_files("username/my-cool-repo")
- # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`.
- # e.g. select only LFS files in the "checkpoints" folder
- >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/"))
- # Permanently delete LFS files
- >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete)
- ```
- """
- file_oid: str
- filename: str
- oid: str
- pushed_at: datetime
- ref: str | None
- size: int
- def __init__(self, **kwargs) -> None:
- self.file_oid = kwargs.pop("fileOid")
- self.filename = kwargs.pop("filename")
- self.oid = kwargs.pop("oid")
- self.pushed_at = parse_datetime(kwargs.pop("pushedAt"))
- self.ref = kwargs.pop("ref", None)
- self.size = kwargs.pop("size")
- # forward compatibility
- self.__dict__.update(**kwargs)
- @dataclass
- class DatasetLeaderboardEntry:
- """Contains information about a single entry in a dataset leaderboard on the Hub.
- A leaderboard ranks models based on their evaluation scores on a given benchmark dataset.
- This object is returned by [`get_dataset_leaderboard`]. To get evaluation results for a
- specific model across benchmarks, see [`ModelInfo.eval_results`] (via [`model_info`] with
- `expand=["evalResults"]`) and [`EvalResultEntry`].
- Attributes:
- rank (`int`):
- Rank of the model on the leaderboard (1-indexed).
- model_id (`str`):
- ID of the model (e.g. `"meta-llama/Llama-3-8b"`).
- value (`float`):
- Evaluation score value.
- filename (`str`):
- Name of the result file containing the evaluation data.
- verified (`bool`):
- Whether the result has been verified.
- source (`dict[str, Any]`):
- Information about the source of the evaluation result. Contains keys like
- `"url"`, `"name"`, and `"isExternal"`.
- author (`User` or `Organization`):
- The model author, parsed based on the `"type"` field in the API response.
- pull_request (`int`, *optional*):
- Pull request number associated with the leaderboard entry, if any.
- notes (`str`, *optional*):
- Notes associated with the leaderboard entry, if any.
- """
- rank: int
- model_id: str
- value: float
- filename: str
- verified: bool
- source: dict[str, Any]
- author: User | Organization
- pull_request: int | None = None
- notes: str | None = None
- def __init__(self, **kwargs) -> None:
- self.rank = kwargs.pop("rank")
- self.model_id = kwargs.pop("modelId")
- self.value = kwargs.pop("value")
- self.filename = kwargs.pop("filename")
- self.verified = kwargs.pop("verified")
- self.source = kwargs.pop("source")
- author_data = dict(kwargs.pop("author"))
- author_type = author_data.get("type")
- if author_type == "org":
- self.author = Organization(**author_data)
- else:
- author_data["user"] = author_data.pop("name", "")
- self.author = User(**author_data)
- self.pull_request = kwargs.pop("pullRequest", None)
- self.notes = kwargs.pop("notes", None)
- # forward compatibility
- self.__dict__.update(**kwargs)
- def future_compatible(fn: CallableT) -> CallableT:
- """Wrap a method of `HfApi` to handle `run_as_future=True`.
- A method flagged as "future_compatible" will be called in a thread if `run_as_future=True` and return a
- `concurrent.futures.Future` instance. Otherwise, it will be called normally and return the result.
- """
- sig = inspect.signature(fn)
- args_params = list(sig.parameters)[1:] # remove "self" from list
- @wraps(fn)
- def _inner(self, *args, **kwargs):
- # Get `run_as_future` value if provided (default to False)
- if "run_as_future" in kwargs:
- run_as_future = kwargs["run_as_future"]
- kwargs["run_as_future"] = False # avoid recursion error
- else:
- run_as_future = False
- for param, value in zip(args_params, args):
- if param == "run_as_future":
- run_as_future = value
- break
- # Call the function in a thread if `run_as_future=True`
- if run_as_future:
- return self.run_as_future(fn, self, *args, **kwargs)
- # Otherwise, call the function normally
- return fn(self, *args, **kwargs)
- _inner.is_future_compatible = True # type: ignore
- return _inner # type: ignore
- def _get_safetensors_metadata_size(size_bytes: bytes, filename: str, context_msg: str) -> int:
- """
- Parse and validate safetensors metadata size from the first 8 bytes.
- This is a shared helper function used by both remote and local safetensors parsing.
- Args:
- size_bytes: First 8 bytes of the safetensors file.
- filename: Filename for error messages.
- context_msg: Additional context for error messages.
- Returns:
- The metadata size as an integer.
- Raises:
- SafetensorsParsingError: If size_bytes is too short or metadata size exceeds limit.
- """
- if len(size_bytes) < 8:
- raise SafetensorsParsingError(
- f"Failed to parse safetensors header for '{filename}' ({context_msg}): file is too small to be a valid "
- "safetensors file."
- )
- metadata_size = struct.unpack("<Q", size_bytes[:8])[0]
- if metadata_size > constants.SAFETENSORS_MAX_HEADER_LENGTH:
- raise SafetensorsParsingError(
- f"Failed to parse safetensors header for '{filename}' ({context_msg}): safetensors header is too big. "
- f"Maximum supported size is {constants.SAFETENSORS_MAX_HEADER_LENGTH} bytes (got {metadata_size})."
- )
- return metadata_size
- def _parse_safetensors_header(metadata_as_bytes: bytes, filename: str, context_msg: str) -> SafetensorsFileMetadata:
- """
- Parse safetensors metadata from raw header bytes.
- This is a shared helper function used by both remote and local safetensors parsing.
- Args:
- metadata_as_bytes: Raw bytes of the JSON metadata header (without the 8-byte size prefix).
- filename: Filename for error messages.
- context_msg: Additional context for error messages (e.g., repo info or local path).
- Returns:
- SafetensorsFileMetadata object.
- Raises:
- SafetensorsParsingError: If the header cannot be parsed.
- """
- # Parse json header
- try:
- metadata_as_dict = json.loads(metadata_as_bytes.decode(errors="ignore"))
- except json.JSONDecodeError as e:
- raise SafetensorsParsingError(
- f"Failed to parse safetensors header for '{filename}' ({context_msg}): header is not json-encoded string. "
- "Please make sure this is a correctly formatted safetensors file."
- ) from e
- try:
- return SafetensorsFileMetadata(
- metadata=metadata_as_dict.get("__metadata__", {}),
- tensors={
- key: TensorInfo(
- dtype=tensor["dtype"],
- shape=tensor["shape"],
- data_offsets=tuple(tensor["data_offsets"]), # type: ignore
- )
- for key, tensor in metadata_as_dict.items()
- if key != "__metadata__"
- },
- )
- except (KeyError, IndexError) as e:
- raise SafetensorsParsingError(
- f"Failed to parse safetensors header for '{filename}' ({context_msg}): header format not recognized. "
- "Please make sure this is a correctly formatted safetensors file."
- ) from e
- class HfApi:
- """
- Client to interact with the Hugging Face Hub via HTTP.
- The client is initialized with some high-level settings used in all requests
- made to the Hub (HF endpoint, authentication, user agents...). Using the `HfApi`
- client is preferred but not mandatory as all of its public methods are exposed
- directly at the root of `huggingface_hub`.
- Args:
- endpoint (`str`, *optional*):
- Endpoint of the Hub. Defaults to <https://huggingface.co>.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- library_name (`str`, *optional*):
- The name of the library that is making the HTTP request. Will be added to
- the user-agent header. Example: `"transformers"`.
- library_version (`str`, *optional*):
- The version of the library that is making the HTTP request. Will be added
- to the user-agent header. Example: `"4.24.0"`.
- user_agent (`str`, `dict`, *optional*):
- The user agent info in the form of a dictionary or a single string. It will
- be completed with information about the installed packages.
- headers (`dict`, *optional*):
- Additional headers to be sent with each request. Example: `{"X-My-Header": "value"}`.
- Headers passed here are taking precedence over the default headers.
- """
- def __init__(
- self,
- endpoint: str | None = None,
- token: str | bool | None = None,
- library_name: str | None = None,
- library_version: str | None = None,
- user_agent: dict | str | None = None,
- headers: dict[str, str] | None = None,
- ) -> None:
- self.endpoint = endpoint if endpoint is not None else constants.ENDPOINT
- self.token = token
- self.library_name = library_name
- self.library_version = library_version
- self.user_agent = user_agent
- self.headers = headers
- self._thread_pool: ThreadPoolExecutor | None = None
- # /whoami-v2 is the only endpoint for which we may want to cache results
- self._whoami_cache: dict[str, dict] = {}
- def run_as_future(self, fn: Callable[..., R], *args, **kwargs) -> Future[R]:
- """
- Run a method in the background and return a Future instance.
- The main goal is to run methods without blocking the main thread (e.g. to push data during a training).
- Background jobs are queued to preserve order but are not ran in parallel. If you need to speed-up your scripts
- by parallelizing lots of call to the API, you must setup and use your own [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor).
- Note: Most-used methods like [`upload_file`], [`upload_folder`] and [`create_commit`] have a `run_as_future: bool`
- argument to directly call them in the background. This is equivalent to calling `api.run_as_future(...)` on them
- but less verbose.
- Args:
- fn (`Callable`):
- The method to run in the background.
- *args, **kwargs:
- Arguments with which the method will be called.
- Return:
- `Future`: a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) instance to
- get the result of the task.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> future = api.run_as_future(api.whoami) # instant
- >>> future.done()
- False
- >>> future.result() # wait until complete and return result
- (...)
- >>> future.done()
- True
- ```
- """
- if self._thread_pool is None:
- self._thread_pool = ThreadPoolExecutor(max_workers=1)
- self._thread_pool
- return self._thread_pool.submit(fn, *args, **kwargs)
- @validate_hf_hub_args
- def whoami(self, token: bool | str | None = None, *, cache: bool = False) -> dict:
- """
- Call HF API to know "whoami".
- If passing `cache=True`, the result will be cached for subsequent calls for the duration of the Python process. This is useful if you plan to call
- `whoami` multiple times as this endpoint is heavily rate-limited for security reasons.
- Args:
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- cache (`bool`, *optional*):
- Whether to cache the result of the `whoami` call for subsequent calls.
- If an error occurs during the first call, it won't be cached.
- Defaults to `False`.
- """
- # Get the effective token using the helper function get_token
- token = self.token if token is None else token
- if token is False:
- raise ValueError("Cannot use `token=False` with `whoami` method as it requires authentication.")
- if token is True or token is None:
- token = get_token()
- if token is None:
- raise LocalTokenNotFoundError(
- "Token is required to call the /whoami-v2 endpoint, but no token found. You must provide a token or be logged in to "
- "Hugging Face with `hf auth login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens."
- )
- if cache and (cached_token := self._whoami_cache.get(token)):
- return cached_token
- # Call Hub
- output = self._inner_whoami(token=token)
- # Cache result and return
- if cache:
- self._whoami_cache[token] = output
- return output
- def _inner_whoami(self, token: str) -> dict:
- r = get_session().get(
- f"{self.endpoint}/api/whoami-v2",
- headers=self._build_hf_headers(token=token),
- )
- try:
- hf_raise_for_status(r)
- except HfHubHTTPError as e:
- if e.response.status_code == 401:
- error_message = "Invalid user token."
- # Check which token is the effective one and generate the error message accordingly
- if token == _get_token_from_google_colab():
- error_message += " The token from Google Colab vault is invalid. Please update it from the UI."
- elif token == _get_token_from_environment():
- error_message += (
- " The token from HF_TOKEN environment variable is invalid. "
- "Note that HF_TOKEN takes precedence over `hf auth login`."
- )
- elif token == _get_token_from_file():
- error_message += (
- " The token stored is invalid. Please run `hf auth login --force` to set a new token."
- )
- raise HfHubHTTPError(error_message, response=e.response) from e
- if e.response.status_code == 429:
- error_message = (
- "You've hit the rate limit for the /whoami-v2 endpoint, which is intentionally strict for security reasons."
- " If you're calling it often, consider caching the response with `whoami(..., cache=True)`."
- )
- raise HfHubHTTPError(error_message, response=e.response) from e
- raise
- return r.json()
- def get_model_tags(self) -> dict:
- """
- List all valid model tags as a nested namespace object
- """
- path = f"{self.endpoint}/api/models-tags-by-type"
- r = get_session().get(path)
- hf_raise_for_status(r)
- return r.json()
- def get_dataset_tags(self) -> dict:
- """
- List all valid dataset tags as a nested namespace object.
- """
- path = f"{self.endpoint}/api/datasets-tags-by-type"
- r = get_session().get(path)
- hf_raise_for_status(r)
- return r.json()
- @_deprecate_arguments(version="2.0", deprecated_args=["model_name"], custom_message="Use `search` instead.")
- @validate_hf_hub_args
- def list_models(
- self,
- *,
- # Search-query parameter
- filter: str | Iterable[str] | None = None,
- author: str | None = None,
- apps: str | list[str] | None = None,
- gated: bool | None = None,
- inference: Literal["warm"] | None = None,
- inference_provider: Literal["all"] | PROVIDER_T | list[PROVIDER_T] | None = None,
- model_name: str | None = None,
- trained_dataset: str | list[str] | None = None,
- search: str | None = None,
- pipeline_tag: str | None = None,
- num_parameters: str | None = None,
- emissions_thresholds: tuple[float, float] | None = None,
- # Sorting and pagination parameters
- sort: ModelSort_T | None = None,
- limit: int | None = None,
- # Additional data to fetch
- expand: list[ExpandModelProperty_T] | None = None,
- full: bool | None = None,
- cardData: bool = False,
- fetch_config: bool = False,
- token: bool | str | None = None,
- ) -> Iterable[ModelInfo]:
- """
- List models hosted on the Huggingface Hub, given some filters.
- Args:
- filter (`str` or `Iterable[str]`, *optional*):
- A string or list of string to filter models on the Hub.
- Models can be filtered by library, language, task, tags, and more.
- author (`str`, *optional*):
- A string which identify the author (user or organization) of the
- returned models.
- apps (`str` or `List`, *optional*):
- A string or list of strings to filter models on the Hub that
- support the specified apps. Example values include `"ollama"` or `["ollama", "vllm"]`.
- gated (`bool`, *optional*):
- A boolean to filter models on the Hub that are gated or not. By default, all models are returned.
- If `gated=True` is passed, only gated models are returned.
- If `gated=False` is passed, only non-gated models are returned.
- inference (`Literal["warm"]`, *optional*):
- If "warm", filter models on the Hub currently served by at least one provider.
- inference_provider (`Literal["all"]` or `str`, *optional*):
- A string to filter models on the Hub that are served by a specific provider.
- Pass `"all"` to get all models served by at least one provider.
- trained_dataset (`str` or `List`, *optional*):
- A string tag or a list of string tags of the trained dataset for a
- model on the Hub.
- search (`str`, *optional*):
- A string that will be contained in the returned model ids.
- pipeline_tag (`str`, *optional*):
- A string pipeline tag to filter models on the Hub by, such as `summarization`.
- num_parameters (`str`, *optional*):
- Filter models by parameter count. Accepts the same range syntax as the Hub UI and API, for example
- `"min:6B,max:128B"`, `"min:6B"` or `"max:128B"`.
- emissions_thresholds (`Tuple`, *optional*):
- A tuple of two ints or floats representing a minimum and maximum
- carbon footprint to filter the resulting models with in grams.
- sort (`ModelSort_T`, *optional*):
- The key with which to sort the resulting models. Possible values are "created_at", "downloads",
- "last_modified", "likes" and "trending_score".
- limit (`int`, *optional*):
- The limit on the number of models fetched. Leaving this option
- to `None` fetches all models.
- expand (`list[ExpandModelProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `full`, `cardData` or `fetch_config` are passed.
- Possible values are `"author"`, `"cardData"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"evalResults"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, and `"resourceGroup"`.
- full (`bool`, *optional*):
- Whether to fetch all model data, including the `last_modified`,
- the `sha`, the files and the `tags`. This is set to `True` by
- default when using a filter.
- cardData (`bool`, *optional*):
- Whether to grab the metadata for the model as well. Can contain
- useful information such as carbon emissions, metrics, and
- datasets trained on.
- fetch_config (`bool`, *optional*):
- Whether to fetch the model configs as well. This is not included
- in `full` due to its size.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- model_name (`str`, *optional*):
- (deprecated). Use `search` instead.
- Returns:
- `Iterable[ModelInfo]`: an iterable of [`huggingface_hub.hf_api.ModelInfo`] objects.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # List all models
- >>> api.list_models()
- # List text classification models
- >>> api.list_models(filter="text-classification")
- # List models from the KerasHub library
- >>> api.list_models(filter="keras-hub")
- # List models served by Cohere
- >>> api.list_models(inference_provider="cohere")
- # List models with "bert" in their name
- >>> api.list_models(search="bert")
- # List models with "bert" in their name and pushed by google
- >>> api.list_models(search="bert", author="google")
- # List models with 6B to 128B parameters
- >>> api.list_models(num_parameters="min:6B,max:128B", sort="likes")
- ```
- """
- if expand and (full or cardData or fetch_config):
- raise ValueError("`expand` cannot be used if `full`, `cardData` or `fetch_config` are passed.")
- if emissions_thresholds is not None and not cardData:
- raise ValueError("`emissions_thresholds` were passed without setting `cardData=True`.")
- path = f"{self.endpoint}/api/models"
- headers = self._build_hf_headers(token=token)
- params: dict[str, Any] = {}
- # Build the filter list
- filter_list: list[str] = []
- if filter:
- filter_list.extend([filter] if isinstance(filter, str) else filter)
- if trained_dataset:
- datasets = [trained_dataset] if isinstance(trained_dataset, str) else trained_dataset
- filter_list.extend(f"dataset:{d}" if not d.startswith("dataset:") else d for d in datasets)
- if len(filter_list) > 0:
- params["filter"] = filter_list
- # Handle other query params
- if author:
- params["author"] = author
- if apps:
- if isinstance(apps, str):
- apps = [apps]
- params["apps"] = apps
- if gated is not None:
- params["gated"] = gated
- if inference is not None:
- params["inference"] = inference
- if inference_provider is not None:
- params["inference_provider"] = inference_provider
- if pipeline_tag:
- params["pipeline_tag"] = pipeline_tag
- if num_parameters is not None:
- params["num_parameters"] = num_parameters
- search_list = []
- if model_name: # deprecated
- search_list.append(model_name)
- if search:
- search_list.append(search)
- if len(search_list) > 0:
- params["search"] = search_list
- if sort is not None:
- params["sort"] = (
- "lastModified"
- if sort == "last_modified"
- else "trendingScore"
- if sort == "trending_score"
- else "createdAt"
- if sort == "created_at"
- else sort
- )
- if limit is not None:
- params["limit"] = limit
- # Request additional data
- if full:
- params["full"] = True
- if fetch_config:
- params["config"] = True
- if cardData:
- params["cardData"] = True
- if expand:
- params["expand"] = expand
- # `items` is a generator
- items = paginate(path, params=params, headers=headers)
- if limit is not None:
- items = islice(items, limit) # Do not iterate over all pages
- for item in items:
- if "siblings" not in item:
- item["siblings"] = None
- model_info = ModelInfo(**item)
- if emissions_thresholds is None or _is_emission_within_threshold(model_info, *emissions_thresholds):
- yield model_info
- @validate_hf_hub_args
- def list_datasets(
- self,
- *,
- # Search-query parameter
- filter: str | Iterable[str] | None = None,
- author: str | None = None,
- benchmark: Literal[True] | Literal["official"] | str | None = None,
- dataset_name: str | None = None,
- gated: bool | None = None,
- language_creators: str | list[str] | None = None,
- language: str | list[str] | None = None,
- multilinguality: str | list[str] | None = None,
- size_categories: str | list[str] | None = None,
- task_categories: str | list[str] | None = None,
- task_ids: str | list[str] | None = None,
- search: str | None = None,
- # Sorting and pagination parameters
- sort: DatasetSort_T | None = None,
- limit: int | None = None,
- # Additional data to fetch
- expand: list[ExpandDatasetProperty_T] | None = None,
- full: bool | None = None,
- token: bool | str | None = None,
- ) -> Iterable[DatasetInfo]:
- """
- List datasets hosted on the Huggingface Hub, given some filters.
- Args:
- filter (`str` or `Iterable[str]`, *optional*):
- A string or list of string to filter datasets on the hub.
- author (`str`, *optional*):
- A string which identify the author of the returned datasets.
- benchmark (`True`, `"official"`, `str`, *optional*):
- Filter datasets by benchmark. Can be `True` or `"official"` to return official benchmark datasets.
- For future-compatibility, can also be a string representing the benchmark name (currently only "official" is supported).
- dataset_name (`str`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub by its name, such as `SQAC` or `wikineural`
- gated (`bool`, *optional*):
- A boolean to filter datasets on the Hub that are gated or not. By default, all datasets are returned.
- If `gated=True` is passed, only gated datasets are returned.
- If `gated=False` is passed, only non-gated datasets are returned.
- language_creators (`str` or `List`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub with how the data was curated, such as `crowdsourced` or
- `machine_generated`.
- language (`str` or `List`, *optional*):
- A string or list of strings representing a two-character language to
- filter datasets by on the Hub.
- multilinguality (`str` or `List`, *optional*):
- A string or list of strings representing a filter for datasets that
- contain multiple languages.
- size_categories (`str` or `List`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub by the size of the dataset such as `100K<n<1M` or
- `1M<n<10M`.
- tags (`str` or `List`, *optional*):
- Deprecated. Pass tags in `filter` to filter datasets by tags.
- task_categories (`str` or `List`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub by the designed task, such as `audio_classification` or
- `named_entity_recognition`.
- task_ids (`str` or `List`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub by the specific task such as `speech_emotion_recognition` or
- `paraphrase`.
- search (`str`, *optional*):
- A string that will be contained in the returned datasets.
- sort (`DatasetSort_T`, *optional*):
- The key with which to sort the resulting datasets. Possible values are "created_at", "downloads",
- "last_modified", "likes" and "trending_score".
- limit (`int`, *optional*):
- The limit on the number of datasets fetched. Leaving this option
- to `None` fetches all datasets.
- expand (`list[ExpandDatasetProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `full` is passed.
- Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, and `"resourceGroup"`.
- full (`bool`, *optional*):
- Whether to fetch all dataset data, including the `last_modified`,
- the `card_data` and the files. Can contain useful information such as the
- PapersWithCode ID.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[DatasetInfo]`: an iterable of [`huggingface_hub.hf_api.DatasetInfo`] objects.
- Example usage with the `filter` argument:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # List all datasets
- >>> api.list_datasets()
- # List only the text classification datasets
- >>> api.list_datasets(filter="task_categories:text-classification")
- # List only the datasets in russian for language modeling
- >>> api.list_datasets(
- ... filter=("language:ru", "task_ids:language-modeling")
- ... )
- # List FiftyOne datasets (identified by the tag "fiftyone" in dataset card)
- >>> api.list_datasets(tags="fiftyone")
- ```
- Example usage with the `search` argument:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # List all datasets with "text" in their name
- >>> api.list_datasets(search="text")
- # List all datasets with "text" in their name made by google
- >>> api.list_datasets(search="text", author="google")
- ```
- """
- if expand and full:
- raise ValueError("`expand` cannot be used if `full` is passed.")
- path = f"{self.endpoint}/api/datasets"
- headers = self._build_hf_headers(token=token)
- params: dict[str, Any] = {}
- # Build `filter` list
- filter_list = []
- if filter is not None:
- if isinstance(filter, str):
- filter_list.append(filter)
- else:
- filter_list.extend(filter)
- for key, value in (
- ("language_creators", language_creators),
- ("language", language),
- ("multilinguality", multilinguality),
- ("size_categories", size_categories),
- ("task_categories", task_categories),
- ("task_ids", task_ids),
- ):
- if value:
- if isinstance(value, str):
- value = [value]
- for value_item in value:
- if not value_item.startswith(f"{key}:"):
- data = f"{key}:{value_item}"
- else:
- data = value_item
- filter_list.append(data)
- if benchmark is not None:
- if benchmark is True: # alias for official benchmark
- benchmark = "official"
- filter_list.append(f"benchmark:{benchmark}")
- if len(filter_list) > 0:
- params["filter"] = filter_list
- # Handle other query params
- if author:
- params["author"] = author
- if gated is not None:
- params["gated"] = gated
- search_list = []
- if dataset_name:
- search_list.append(dataset_name)
- if search:
- search_list.append(search)
- if len(search_list) > 0:
- params["search"] = search_list
- if sort is not None:
- params["sort"] = (
- "lastModified"
- if sort == "last_modified"
- else "trendingScore"
- if sort == "trending_score"
- else "createdAt"
- if sort == "created_at"
- else sort
- )
- if limit is not None:
- params["limit"] = limit
- # Request additional data
- if expand:
- params["expand"] = expand
- if full:
- params["full"] = True
- items = paginate(path, params=params, headers=headers)
- if limit is not None:
- items = islice(items, limit) # Do not iterate over all pages
- for item in items:
- if "siblings" not in item:
- item["siblings"] = None
- yield DatasetInfo(**item)
- @validate_hf_hub_args
- def list_dataset_parquet_files(
- self,
- repo_id: str,
- *,
- config: str | None = None,
- token: bool | str | None = None,
- ) -> list[DatasetParquetEntry]:
- """List parquet files available for a dataset on the Hub.
- All datasets hosted on the Hub are auto-converted to Parquet by the
- [Dataset Viewer](https://huggingface.co/docs/dataset-viewer/parquet).
- This method returns the list of parquet files with their URLs, configs,
- splits and sizes.
- Args:
- repo_id (`str`):
- The dataset repository ID (e.g. `"username/dataset-name"`).
- config (`str`, *optional*):
- Filter by a specific config/subset name. When provided, only
- parquet files for that config are returned.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `list[DatasetParquetEntry]`: a list of [`DatasetParquetEntry`] objects
- containing config, split, url and size for each parquet file.
- Example:
- ```python
- >>> from huggingface_hub import list_dataset_parquet_files
- >>> list_dataset_parquet_files("lhoestq/demo1")
- >>> entries[0]
- DatasetParquetEntry(config='default', split='train', url='https://huggingface.co/...', size=5038)
- ```
- """
- if self.endpoint != constants._HF_DEFAULT_ENDPOINT:
- raise ValueError(
- "The Dataset Viewer is only available on the Hugging Face Hub"
- f" (endpoint='{constants._HF_DEFAULT_ENDPOINT}'). It is not supported on"
- f" third-party endpoints. (endpoint={self.endpoint})"
- )
- url = f"{constants.DATASETS_SERVER_ENDPOINT}/parquet"
- params: dict[str, str] = {"dataset": repo_id}
- if config is not None:
- params["config"] = config
- response = get_session().get(url, params=params, headers=self._build_hf_headers(token=token))
- hf_raise_for_status(response)
- payload = response.json()
- return [
- DatasetParquetEntry(
- config=file_info["config"],
- split=file_info["split"],
- url=file_info["url"],
- size=file_info["size"],
- )
- for file_info in payload.get("parquet_files", [])
- ]
- @validate_hf_hub_args
- def list_spaces(
- self,
- *,
- # Search-query parameter
- filter: str | Iterable[str] | None = None,
- author: str | None = None,
- search: str | None = None,
- datasets: str | Iterable[str] | None = None,
- models: str | Iterable[str] | None = None,
- linked: bool = False,
- # Sorting and pagination parameters
- sort: SpaceSort_T | None = None,
- limit: int | None = None,
- # Additional data to fetch
- expand: list[ExpandSpaceProperty_T] | None = None,
- full: bool | None = None,
- token: bool | str | None = None,
- ) -> Iterable[SpaceInfo]:
- """
- List spaces hosted on the Huggingface Hub, given some filters.
- Args:
- filter (`str` or `Iterable`, *optional*):
- A string tag or list of tags that can be used to identify Spaces on the Hub.
- author (`str`, *optional*):
- A string which identify the author of the returned Spaces.
- search (`str`, *optional*):
- A string that will be contained in the returned Spaces.
- datasets (`str` or `Iterable`, *optional*):
- Whether to return Spaces that make use of a dataset.
- The name of a specific dataset can be passed as a string.
- models (`str` or `Iterable`, *optional*):
- Whether to return Spaces that make use of a model.
- The name of a specific model can be passed as a string.
- linked (`bool`, *optional*):
- Whether to return Spaces that make use of either a model or a dataset.
- sort (`SpaceSort_T`, *optional*):
- The key with which to sort the resulting spaces. Possible values are "created_at", "last_modified",
- "likes" and "trending_score".
- limit (`int`, *optional*):
- The limit on the number of Spaces fetched. Leaving this option
- to `None` fetches all Spaces.
- expand (`list[ExpandSpaceProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `full` is passed.
- Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, and `"resourceGroup"`.
- full (`bool`, *optional*):
- Whether to fetch all Spaces data, including the `last_modified`, `siblings`
- and `card_data` fields.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[SpaceInfo]`: an iterable of [`huggingface_hub.hf_api.SpaceInfo`] objects.
- """
- if expand and full:
- raise ValueError("`expand` cannot be used if `full` is passed.")
- path = f"{self.endpoint}/api/spaces"
- headers = self._build_hf_headers(token=token)
- params: dict[str, Any] = {}
- if filter is not None:
- params["filter"] = filter
- if author is not None:
- params["author"] = author
- if search is not None:
- params["search"] = search
- if sort is not None:
- params["sort"] = (
- "lastModified"
- if sort == "last_modified"
- else "trendingScore"
- if sort == "trending_score"
- else "createdAt"
- if sort == "created_at"
- else sort
- )
- if limit is not None:
- params["limit"] = limit
- if linked:
- params["linked"] = True
- if datasets is not None:
- params["datasets"] = datasets
- if models is not None:
- params["models"] = models
- # Request additional data
- if expand:
- params["expand"] = expand
- if full:
- params["full"] = True
- items = paginate(path, params=params, headers=headers)
- if limit is not None:
- items = islice(items, limit) # Do not iterate over all pages
- for item in items:
- if "siblings" not in item:
- item["siblings"] = None
- yield SpaceInfo(**item)
- @validate_hf_hub_args
- def search_spaces(
- self,
- query: str,
- *,
- filter: str | Iterable[str] | None = None,
- sdk: str | list[str] | None = None,
- include_non_running: bool = False,
- token: bool | str | None = None,
- ) -> Iterable[SpaceSearchResult]:
- """Search Spaces on the Hub using semantic search.
- This endpoint uses semantic search (embedding-based) for multi-word queries
- and full-text search for single-word queries.
- Args:
- query (`str`):
- The search query string.
- filter (`str` or `Iterable[str]`, *optional*):
- A string tag or list of tags to filter by.
- sdk (`str` or `list[str]`, *optional*):
- Filter by SDK (e.g. `"gradio"`, `"docker"`, `"static"`).
- include_non_running (`bool`, *optional*):
- Whether to include non-running Spaces in results. Defaults to `False`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[SpaceSearchResult]`: an iterable of [`SpaceSearchResult`] objects.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> results = list(api.search_spaces("generate image"))
- >>> results[0].id
- 'mrfakename/Z-Image-Turbo'
- >>> results[0].ai_category
- 'Image Generation'
- ```
- """
- path = f"{self.endpoint}/api/spaces/semantic-search"
- headers = self._build_hf_headers(token=token)
- params: dict[str, Any] = {"q": query}
- if filter is not None:
- params["filter"] = filter
- if sdk is not None:
- params["sdk"] = sdk
- if include_non_running:
- params["includeNonRunning"] = True
- r = get_session().get(path, headers=headers, params=params)
- hf_raise_for_status(r)
- for item in r.json():
- yield SpaceSearchResult(item)
- @validate_hf_hub_args
- def unlike(
- self,
- repo_id: str,
- *,
- token: bool | str | None = None,
- repo_type: str | None = None,
- ) -> None:
- """
- Unlike a given repo on the Hub (e.g. remove from favorite list).
- To prevent spam usage, it is not possible to `like` a repository from a script.
- See also [`list_liked_repos`].
- Args:
- repo_id (`str`):
- The repository to unlike. Example: `"user/my-cool-model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if unliking a dataset or space, `None` or
- `"model"` if unliking a model. Default is `None`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- Example:
- ```python
- >>> from huggingface_hub import list_liked_repos, unlike
- >>> "gpt2" in list_liked_repos().models # we assume you have already liked gpt2
- True
- >>> unlike("gpt2")
- >>> "gpt2" in list_liked_repos().models
- False
- ```
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- response = get_session().delete(
- url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/like", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def list_liked_repos(
- self,
- user: str | None = None,
- *,
- token: bool | str | None = None,
- ) -> UserLikes:
- """
- List all public repos liked by a user on huggingface.co.
- This list is public so token is optional. If `user` is not passed, it defaults to
- the logged in user.
- See also [`unlike`].
- Args:
- user (`str`, *optional*):
- Name of the user for which you want to fetch the likes.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`UserLikes`]: object containing the user name and 3 lists of repo ids (1 for
- models, 1 for datasets and 1 for Spaces).
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If `user` is not passed and no token found (either from argument or from machine).
- Example:
- ```python
- >>> from huggingface_hub import list_liked_repos
- >>> likes = list_liked_repos("julien-c")
- >>> likes.user
- "julien-c"
- >>> likes.models
- ["osanseviero/streamlit_1.15", "Xhaheen/ChatGPT_HF", ...]
- ```
- """
- # User is either provided explicitly or retrieved from current token.
- if user is None:
- me = self.whoami(token=token)
- if me["type"] == "user":
- user = me["name"]
- else:
- raise ValueError(
- "Cannot list liked repos. You must provide a 'user' as input or be logged in as a user."
- )
- path = f"{self.endpoint}/api/users/{user}/likes"
- headers = self._build_hf_headers(token=token)
- likes = list(paginate(path, params={}, headers=headers))
- # Looping over a list of items similar to:
- # {
- # 'createdAt': '2021-09-09T21:53:27.000Z',
- # 'repo': {
- # 'name': 'PaddlePaddle/PaddleOCR',
- # 'type': 'space'
- # }
- # }
- # Let's loop 3 times over the received list. Less efficient but more straightforward to read.
- return UserLikes(
- user=user,
- total=len(likes),
- kernels=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "kernel"],
- models=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "model"],
- datasets=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "dataset"],
- spaces=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "space"],
- )
- @validate_hf_hub_args
- def list_repo_likers(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- token: bool | str | None = None,
- ) -> Iterable[User]:
- """
- List all users who liked a given repo on the hugging Face Hub.
- See also [`list_liked_repos`].
- Args:
- repo_id (`str`):
- The repository to retrieve . Example: `"user/my-cool-model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- Returns:
- `Iterable[User]`: an iterable of [`huggingface_hub.hf_api.User`] objects.
- """
- # Construct the API endpoint
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/likers"
- for liker in paginate(path, params={}, headers=self._build_hf_headers(token=token)):
- yield User(username=liker["user"], fullname=liker["fullname"], avatar_url=liker["avatarUrl"])
- @validate_hf_hub_args
- def model_info(
- self,
- repo_id: str,
- *,
- revision: str | None = None,
- timeout: float | None = None,
- securityStatus: bool | None = None,
- files_metadata: bool = False,
- expand: list[ExpandModelProperty_T] | None = None,
- token: bool | str | None = None,
- ) -> ModelInfo:
- """
- Get info on one specific model on huggingface.co
- Model can be private if you pass an acceptable token or are logged in.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`, *optional*):
- The revision of the model repository from which to get the
- information.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- securityStatus (`bool`, *optional*):
- Whether to retrieve the security status from the model
- repository as well. The security status will be returned in the `security_repo_status` field.
- files_metadata (`bool`, *optional*):
- Whether or not to retrieve metadata for files in the repository
- (size, LFS metadata, etc). Defaults to `False`.
- expand (`list[ExpandModelProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `securityStatus` or `files_metadata` are passed.
- Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"evalResults"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"usedStorage"`, and `"resourceGroup"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`huggingface_hub.hf_api.ModelInfo`]: The model repository information.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- """
- if expand and (securityStatus or files_metadata):
- raise ValueError("`expand` cannot be used if `securityStatus` or `files_metadata` are set.")
- headers = self._build_hf_headers(token=token)
- path = (
- f"{self.endpoint}/api/models/{repo_id}"
- if revision is None
- else (f"{self.endpoint}/api/models/{repo_id}/revision/{quote(revision, safe='')}")
- )
- params: dict = {}
- if securityStatus:
- params["securityStatus"] = True
- if files_metadata:
- params["blobs"] = True
- if expand:
- params["expand"] = expand
- r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- hf_raise_for_status(r)
- data = r.json()
- return ModelInfo(**data)
- @validate_hf_hub_args
- def dataset_info(
- self,
- repo_id: str,
- *,
- revision: str | None = None,
- timeout: float | None = None,
- files_metadata: bool = False,
- expand: list[ExpandDatasetProperty_T] | None = None,
- token: bool | str | None = None,
- ) -> DatasetInfo:
- """
- Get info on one specific dataset on huggingface.co.
- Dataset can be private if you pass an acceptable token.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`, *optional*):
- The revision of the dataset repository from which to get the
- information.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- files_metadata (`bool`, *optional*):
- Whether or not to retrieve metadata for files in the repository
- (size, LFS metadata, etc). Defaults to `False`.
- expand (`list[ExpandDatasetProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `files_metadata` is passed.
- Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`,`"usedStorage"`, and `"resourceGroup"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`hf_api.DatasetInfo`]: The dataset repository information.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- """
- if expand and files_metadata:
- raise ValueError("`expand` cannot be used if `files_metadata` is set.")
- headers = self._build_hf_headers(token=token)
- path = (
- f"{self.endpoint}/api/datasets/{repo_id}"
- if revision is None
- else (f"{self.endpoint}/api/datasets/{repo_id}/revision/{quote(revision, safe='')}")
- )
- params: dict = {}
- if files_metadata:
- params["blobs"] = True
- if expand:
- params["expand"] = expand
- r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- hf_raise_for_status(r)
- data = r.json()
- return DatasetInfo(**data)
- @validate_hf_hub_args
- def get_dataset_leaderboard(
- self,
- repo_id: str,
- *,
- token: bool | str | None = None,
- timeout: float | None = None,
- ) -> list[DatasetLeaderboardEntry]:
- """Get the leaderboard for a dataset on the Hub.
- The leaderboard ranks models based on their evaluation scores on the given benchmark
- dataset. Not all datasets have leaderboards — only benchmark datasets with evaluation
- results submitted to them. This gives a dataset-centric view of scores; for a model-centric
- view, use [`model_info`] with `expand=["evalResults"]`.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`. For example: `"allenai/olmOCR-bench"`.
- token (`bool` or `str`, *optional*):
- A valid user access token. Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- Returns:
- `list[DatasetLeaderboardEntry]`: A list of [`DatasetLeaderboardEntry`] objects representing
- the leaderboard entries, sorted by rank.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.HfHubHTTPError`]
- > If the dataset does not have a leaderboard.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> leaderboard = api.get_dataset_leaderboard("allenai/olmOCR-bench")
- >>> leaderboard[0].model_id
- 'datalab-to/chandra-ocr-2'
- >>> leaderboard[0].rank
- 1
- ```
- """
- headers = self._build_hf_headers(token=token)
- path = f"{self.endpoint}/api/datasets/{repo_id}/leaderboard"
- r = get_session().get(path, headers=headers, timeout=timeout)
- hf_raise_for_status(r)
- data = r.json()
- return [DatasetLeaderboardEntry(**entry) for entry in data]
- @validate_hf_hub_args
- def space_info(
- self,
- repo_id: str,
- *,
- revision: str | None = None,
- timeout: float | None = None,
- files_metadata: bool = False,
- expand: list[ExpandSpaceProperty_T] | None = None,
- token: bool | str | None = None,
- ) -> SpaceInfo:
- """
- Get info on one specific Space on huggingface.co.
- Space can be private if you pass an acceptable token.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`, *optional*):
- The revision of the space repository from which to get the
- information.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- files_metadata (`bool`, *optional*):
- Whether or not to retrieve metadata for files in the repository
- (size, LFS metadata, etc). Defaults to `False`.
- expand (`list[ExpandSpaceProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `full` is passed.
- Possible values are `"author"`, `"cardData"`, `"createdAt"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, and `"resourceGroup"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`~hf_api.SpaceInfo`]: The space repository information.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- """
- if expand and files_metadata:
- raise ValueError("`expand` cannot be used if `files_metadata` is set.")
- headers = self._build_hf_headers(token=token)
- path = (
- f"{self.endpoint}/api/spaces/{repo_id}"
- if revision is None
- else (f"{self.endpoint}/api/spaces/{repo_id}/revision/{quote(revision, safe='')}")
- )
- params: dict = {}
- if files_metadata:
- params["blobs"] = True
- if expand:
- params["expand"] = expand
- r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- hf_raise_for_status(r)
- data = r.json()
- return SpaceInfo(**data)
- @validate_hf_hub_args
- def kernel_info(
- self,
- repo_id: str,
- *,
- revision: str | None = None,
- timeout: float | None = None,
- token: bool | str | None = None,
- ) -> KernelInfo:
- """
- Get info on one specific kernel on huggingface.co.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- revision (`str`, *optional*):
- The revision of the kernel repository from which to get the
- information.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`~hf_api.ModelInfo`]: The kernel repository information.
- """
- headers = self._build_hf_headers(token=token)
- path = (
- f"{self.endpoint}/api/kernels/{repo_id}"
- if revision is None
- else (f"{self.endpoint}/api/kernels/{repo_id}/revision/{quote(revision, safe='')}")
- )
- r = get_session().get(path, headers=headers, timeout=timeout)
- hf_raise_for_status(r)
- data = r.json()
- return KernelInfo(**data)
- @validate_hf_hub_args
- def repo_info(
- self,
- repo_id: str,
- *,
- revision: str | None = None,
- repo_type: str | None = None,
- timeout: float | None = None,
- files_metadata: bool = False,
- expand: ExpandModelProperty_T | ExpandDatasetProperty_T | ExpandSpaceProperty_T | None = None,
- token: bool | str | None = None,
- ) -> ModelInfo | DatasetInfo | SpaceInfo | KernelInfo:
- """
- Get the info object for a given repo of a given type.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`, *optional*):
- The revision of the repository from which to get the
- information.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
- `None` or `"model"` if getting repository info from a model. Default is `None`.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- expand (`ExpandModelProperty_T` or `ExpandDatasetProperty_T` or `ExpandSpaceProperty_T`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `files_metadata` is passed.
- For an exhaustive list of available properties, check out [`model_info`], [`dataset_info`] or [`space_info`].
- files_metadata (`bool`, *optional*):
- Whether or not to retrieve metadata for files in the repository
- (size, LFS metadata, etc). Defaults to `False`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Union[SpaceInfo, DatasetInfo, ModelInfo]`: The repository information, as a
- [`huggingface_hub.hf_api.DatasetInfo`], [`huggingface_hub.hf_api.ModelInfo`]
- or [`huggingface_hub.hf_api.SpaceInfo`] object.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- """
- match repo_type:
- case None | "model":
- method = self.model_info
- case "dataset":
- method = self.dataset_info # type: ignore
- case "space":
- method = self.space_info # type: ignore
- case "kernel":
- # No expand/files_metadata for kernels
- return self.kernel_info(repo_id, revision=revision, token=token, timeout=timeout)
- case _:
- raise ValueError("Unsupported repo type.")
- return method(
- repo_id,
- revision=revision,
- token=token,
- timeout=timeout,
- expand=expand, # type: ignore
- files_metadata=files_metadata,
- )
- @validate_hf_hub_args
- def repo_exists(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- token: str | bool | None = None,
- ) -> bool:
- """
- Checks if a repository exists on the Hugging Face Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
- `None` or `"model"` if getting repository info from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- True if the repository exists, False otherwise.
- Examples:
- ```py
- >>> from huggingface_hub import repo_exists
- >>> repo_exists("google/gemma-7b")
- True
- >>> repo_exists("google/not-a-repo")
- False
- ```
- """
- try:
- self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token)
- return True
- except GatedRepoError:
- return True # we don't have access but it exists
- except RepositoryNotFoundError:
- return False
- @validate_hf_hub_args
- def revision_exists(
- self,
- repo_id: str,
- revision: str,
- *,
- repo_type: str | None = None,
- token: str | bool | None = None,
- ) -> bool:
- """
- Checks if a specific revision exists on a repo on the Hugging Face Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`):
- The revision of the repository to check.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
- `None` or `"model"` if getting repository info from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- True if the repository and the revision exists, False otherwise.
- Examples:
- ```py
- >>> from huggingface_hub import revision_exists
- >>> revision_exists("google/gemma-7b", "float16")
- True
- >>> revision_exists("google/gemma-7b", "not-a-revision")
- False
- ```
- """
- try:
- self.repo_info(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token)
- return True
- except RevisionNotFoundError:
- return False
- except RepositoryNotFoundError:
- return False
- @validate_hf_hub_args
- def file_exists(
- self,
- repo_id: str,
- filename: str,
- *,
- repo_type: str | None = None,
- revision: str | None = None,
- token: str | bool | None = None,
- ) -> bool:
- """
- Checks if a file exists in a repository on the Hugging Face Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- filename (`str`):
- The name of the file to check, for example:
- `"config.json"`
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
- `None` or `"model"` if getting repository info from a model. Default is `None`.
- revision (`str`, *optional*):
- The revision of the repository from which to get the information. Defaults to `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- True if the file exists, False otherwise.
- Examples:
- ```py
- >>> from huggingface_hub import file_exists
- >>> file_exists("bigcode/starcoder", "config.json")
- True
- >>> file_exists("bigcode/starcoder", "not-a-file")
- False
- >>> file_exists("bigcode/not-a-repo", "config.json")
- False
- ```
- """
- url = hf_hub_url(
- repo_id=repo_id, repo_type=repo_type, revision=revision, filename=filename, endpoint=self.endpoint
- )
- try:
- if token is None:
- token = self.token
- get_hf_file_metadata(url, token=token)
- return True
- except GatedRepoError: # raise specifically on gated repo
- raise
- except (RepositoryNotFoundError, RemoteEntryNotFoundError, RevisionNotFoundError):
- return False
- @validate_hf_hub_args
- def list_repo_files(
- self,
- repo_id: str,
- *,
- revision: str | None = None,
- repo_type: str | None = None,
- token: str | bool | None = None,
- ) -> list[str]:
- """
- Get the list of files in a given repo.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- revision (`str`, *optional*):
- The revision of the repository from which to get the information.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to
- a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `list[str]`: the list of files in a given repository.
- """
- return [
- f.rfilename
- for f in self.list_repo_tree(
- repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type, token=token
- )
- if isinstance(f, RepoFile)
- ]
- @validate_hf_hub_args
- def list_repo_tree(
- self,
- repo_id: str,
- path_in_repo: str | None = None,
- *,
- recursive: bool = False,
- expand: bool = False,
- revision: str | None = None,
- repo_type: str | None = None,
- token: str | bool | None = None,
- ) -> Iterable[RepoFile | RepoFolder]:
- """
- List a repo tree's files and folders and get information about them.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- path_in_repo (`str`, *optional*):
- Relative path of the tree (folder) in the repo, for example:
- `"checkpoints/1fec34a/results"`. Will default to the root tree (folder) of the repository.
- recursive (`bool`, *optional*, defaults to `False`):
- Whether to list tree's files and folders recursively.
- expand (`bool`, *optional*, defaults to `False`):
- Whether to fetch more information about the tree's files and folders (e.g. last commit and files' security scan results). This
- operation is more expensive for the server so only 50 results are returned per page (instead of 1000).
- As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it
- takes to get the results.
- revision (`str`, *optional*):
- The revision of the repository from which to get the tree. Defaults to `"main"` branch.
- repo_type (`str`, *optional*):
- The type of the repository from which to get the tree (`"model"`, `"dataset"`, `"space"` or `"kernel"`).
- Defaults to `"model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[Union[RepoFile, RepoFolder]]`:
- The information about the tree's files and folders, as an iterable of [`RepoFile`] and [`RepoFolder`] objects. The order of the files and folders is
- not guaranteed.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
- does not exist.
- [`~utils.RevisionNotFoundError`]:
- If revision is not found (error 404) on the repo.
- [`~utils.RemoteEntryNotFoundError`]:
- If the tree (folder) does not exist (error 404) on the repo.
- Examples:
- Get information about a repo's tree.
- ```py
- >>> from huggingface_hub import list_repo_tree
- >>> repo_tree = list_repo_tree("lysandre/arxiv-nlp")
- >>> repo_tree
- <generator object HfApi.list_repo_tree at 0x7fa4088e1ac0>
- >>> list(repo_tree)
- [
- RepoFile(path='.gitattributes', size=391, blob_id='ae8c63daedbd4206d7d40126955d4e6ab1c80f8f', lfs=None, last_commit=None, security=None),
- RepoFile(path='README.md', size=391, blob_id='43bd404b159de6fba7c2f4d3264347668d43af25', lfs=None, last_commit=None, security=None),
- RepoFile(path='config.json', size=554, blob_id='2f9618c3a19b9a61add74f70bfb121335aeef666', lfs=None, last_commit=None, security=None),
- RepoFile(
- path='flax_model.msgpack', size=497764107, blob_id='8095a62ccb4d806da7666fcda07467e2d150218e',
- lfs={'size': 497764107, 'sha256': 'd88b0d6a6ff9c3f8151f9d3228f57092aaea997f09af009eefd7373a77b5abb9', 'pointer_size': 134}, last_commit=None, security=None
- ),
- RepoFile(path='merges.txt', size=456318, blob_id='226b0752cac7789c48f0cb3ec53eda48b7be36cc', lfs=None, last_commit=None, security=None),
- RepoFile(
- path='pytorch_model.bin', size=548123560, blob_id='64eaa9c526867e404b68f2c5d66fd78e27026523',
- lfs={'size': 548123560, 'sha256': '9be78edb5b928eba33aa88f431551348f7466ba9f5ef3daf1d552398722a5436', 'pointer_size': 134}, last_commit=None, security=None
- ),
- RepoFile(path='vocab.json', size=898669, blob_id='b00361fece0387ca34b4b8b8539ed830d644dbeb', lfs=None, last_commit=None, security=None)]
- ]
- ```
- Get even more information about a repo's tree (last commit and files' security scan results)
- ```py
- >>> from huggingface_hub import list_repo_tree
- >>> repo_tree = list_repo_tree("prompthero/openjourney-v4", expand=True)
- >>> list(repo_tree)
- [
- RepoFolder(
- path='feature_extractor',
- tree_id='aa536c4ea18073388b5b0bc791057a7296a00398',
- last_commit={
- 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190',
- 'title': 'Upload diffusers weights (#48)',
- 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc)
- }
- ),
- RepoFolder(
- path='safety_checker',
- tree_id='65aef9d787e5557373fdf714d6c34d4fcdd70440',
- last_commit={
- 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190',
- 'title': 'Upload diffusers weights (#48)',
- 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc)
- }
- ),
- RepoFile(
- path='model_index.json',
- size=582,
- blob_id='d3d7c1e8c3e78eeb1640b8e2041ee256e24c9ee1',
- lfs=None,
- last_commit={
- 'oid': 'b195ed2d503f3eb29637050a886d77bd81d35f0e',
- 'title': 'Fix deprecation warning by changing `CLIPFeatureExtractor` to `CLIPImageProcessor`. (#54)',
- 'date': datetime.datetime(2023, 5, 15, 21, 41, 59, tzinfo=datetime.timezone.utc)
- },
- security={
- 'safe': True,
- 'av_scan': {'virusFound': False, 'virusNames': None},
- 'pickle_import_scan': None
- }
- )
- ...
- ]
- ```
- """
- repo_type = repo_type or constants.REPO_TYPE_MODEL
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- headers = self._build_hf_headers(token=token)
- encoded_path_in_repo = "/" + quote(path_in_repo, safe="") if path_in_repo else ""
- tree_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tree/{revision}{encoded_path_in_repo}"
- for path_info in paginate(path=tree_url, headers=headers, params={"recursive": recursive, "expand": expand}):
- yield (RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info))
- @validate_hf_hub_args
- def verify_repo_checksums(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- revision: str | None = None,
- local_dir: str | Path | None = None,
- cache_dir: str | Path | None = None,
- token: str | bool | None = None,
- ) -> FolderVerification:
- """
- Verify local files for a repo against Hub checksums.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- repo_type (`str`, *optional*):
- The type of the repository from which to get the tree (`"model"`, `"dataset"` or `"space"`.
- Defaults to `"model"`.
- revision (`str`, *optional*):
- The revision of the repository from which to get the tree. Defaults to `"main"` branch.
- local_dir (`str` or `Path`, *optional*):
- The local directory to verify.
- cache_dir (`str` or `Path`, *optional*):
- The cache directory to verify.
- token (Union[bool, str, None], optional):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`FolderVerification`]: a structured result containing the verification details.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
- does not exist.
- [`~utils.RevisionNotFoundError`]:
- If revision is not found (error 404) on the repo.
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- if local_dir is not None and cache_dir is not None:
- raise ValueError("Pass either `local_dir` or `cache_dir`, not both.")
- root, remote_revision = resolve_local_root(
- repo_id=repo_id,
- repo_type=repo_type,
- revision=revision,
- cache_dir=Path(cache_dir) if cache_dir is not None else None,
- local_dir=Path(local_dir) if local_dir is not None else None,
- )
- local_by_path = collect_local_files(root)
- # get remote entries (only files, not folders)
- remote_by_path: dict[str, RepoFile] = {}
- for entry in self.list_repo_tree(
- repo_id=repo_id, recursive=True, revision=remote_revision, repo_type=repo_type, token=token
- ):
- if isinstance(entry, RepoFile):
- remote_by_path[entry.path] = entry
- return verify_maps(
- remote_by_path=remote_by_path,
- local_by_path=local_by_path,
- revision=remote_revision,
- verified_path=root,
- )
- @validate_hf_hub_args
- def list_repo_refs(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- include_pull_requests: bool = False,
- token: str | bool | None = None,
- ) -> GitRefs:
- """
- Get the list of refs of a given repo (both tags and branches).
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"`, `"space"` or `"kernel"` if listing refs from a dataset, a Space or a Kernel,
- `None` or `"model"` if listing from a model. Default is `None`.
- include_pull_requests (`bool`, *optional*):
- Whether to include refs from pull requests in the list. Defaults to `False`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> api.list_repo_refs("gpt2")
- GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='e7da7f221d5bf496a48136c0cd264e630fe9fcc8')], converts=[], tags=[])
- >>> api.list_repo_refs("bigcode/the-stack", repo_type='dataset')
- GitRefs(
- branches=[
- GitRefInfo(name='main', ref='refs/heads/main', target_commit='18edc1591d9ce72aa82f56c4431b3c969b210ae3'),
- GitRefInfo(name='v1.1.a1', ref='refs/heads/v1.1.a1', target_commit='f9826b862d1567f3822d3d25649b0d6d22ace714')
- ],
- converts=[],
- tags=[
- GitRefInfo(name='v1.0', ref='refs/tags/v1.0', target_commit='c37a8cd1e382064d8aced5e05543c5f7753834da')
- ]
- )
- ```
- Returns:
- [`GitRefs`]: object containing all information about branches and tags for a
- repo on the Hub.
- """
- repo_type = repo_type or constants.REPO_TYPE_MODEL
- response = get_session().get(
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/refs",
- headers=self._build_hf_headers(token=token),
- params={"include_prs": 1} if include_pull_requests else {},
- )
- hf_raise_for_status(response)
- data = response.json()
- def _format_as_git_ref_info(item: dict) -> GitRefInfo:
- return GitRefInfo(name=item["name"], ref=item["ref"], target_commit=item["targetCommit"])
- return GitRefs(
- branches=[_format_as_git_ref_info(item) for item in data["branches"]],
- converts=[_format_as_git_ref_info(item) for item in data["converts"]],
- tags=[_format_as_git_ref_info(item) for item in data["tags"]],
- pull_requests=[_format_as_git_ref_info(item) for item in data["pullRequests"]]
- if include_pull_requests
- else None,
- )
- @validate_hf_hub_args
- def list_repo_commits(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- token: bool | str | None = None,
- revision: str | None = None,
- formatted: bool = False,
- ) -> list[GitCommitInfo]:
- """
- Get the list of commits of a given revision for a repo on the Hub.
- Commits are sorted by date (last commit first).
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if
- listing from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- formatted (`bool`):
- Whether to return the HTML-formatted title and description of the commits. Defaults to False.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # Commits are sorted by date (last commit first)
- >>> initial_commit = api.list_repo_commits("gpt2")[-1]
- # Initial commit is always a system commit containing the `.gitattributes` file.
- >>> initial_commit
- GitCommitInfo(
- commit_id='9b865efde13a30c13e0a33e536cf3e4a5a9d71d8',
- authors=['system'],
- created_at=datetime.datetime(2019, 2, 18, 10, 36, 15, tzinfo=datetime.timezone.utc),
- title='initial commit',
- message='',
- formatted_title=None,
- formatted_message=None
- )
- # Create an empty branch by deriving from initial commit
- >>> api.create_branch("gpt2", "new_empty_branch", revision=initial_commit.commit_id)
- ```
- Returns:
- list[[`GitCommitInfo`]]: list of objects containing information about the commits for a repo on the Hub.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
- does not exist.
- [`~utils.RevisionNotFoundError`]:
- If revision is not found (error 404) on the repo.
- """
- repo_type = repo_type or constants.REPO_TYPE_MODEL
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- # Paginate over results and return the list of commits.
- return [
- GitCommitInfo(
- commit_id=item["id"],
- authors=[author["user"] for author in item["authors"]],
- created_at=parse_datetime(item["date"]),
- title=item["title"],
- message=item["message"],
- formatted_title=item.get("formatted", {}).get("title"),
- formatted_message=item.get("formatted", {}).get("message"),
- )
- for item in paginate(
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/commits/{revision}",
- headers=self._build_hf_headers(token=token),
- params={"expand[]": "formatted"} if formatted else {},
- )
- ]
- @validate_hf_hub_args
- def get_paths_info(
- self,
- repo_id: str,
- paths: list[str] | str,
- *,
- expand: bool = False,
- revision: str | None = None,
- repo_type: str | None = None,
- token: str | bool | None = None,
- ) -> list[RepoFile | RepoFolder]:
- """
- Get information about a repo's paths.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- paths (`Union[list[str], str]`, *optional*):
- The paths to get information about. If a path do not exist, it is ignored without raising
- an exception.
- expand (`bool`, *optional*, defaults to `False`):
- Whether to fetch more information about the paths (e.g. last commit and files' security scan results). This
- operation is more expensive for the server so only 50 results are returned per page (instead of 1000).
- As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it
- takes to get the results.
- revision (`str`, *optional*):
- The revision of the repository from which to get the information. Defaults to `"main"` branch.
- repo_type (`str`, *optional*):
- The type of the repository from which to get the information (`"model"`, `"dataset"` or `"space"`.
- Defaults to `"model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `list[Union[RepoFile, RepoFolder]]`:
- The information about the paths, as a list of [`RepoFile`] and [`RepoFolder`] objects.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
- does not exist.
- [`~utils.RevisionNotFoundError`]:
- If revision is not found (error 404) on the repo.
- Example:
- ```py
- >>> from huggingface_hub import get_paths_info
- >>> paths_info = get_paths_info("allenai/c4", ["README.md", "en"], repo_type="dataset")
- >>> paths_info
- [
- RepoFile(path='README.md', size=2379, blob_id='f84cb4c97182890fc1dbdeaf1a6a468fd27b4fff', lfs=None, last_commit=None, security=None),
- RepoFolder(path='en', tree_id='dc943c4c40f53d02b31ced1defa7e5f438d5862e', last_commit=None)
- ]
- ```
- """
- repo_type = repo_type or constants.REPO_TYPE_MODEL
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- headers = self._build_hf_headers(token=token)
- response = get_session().post(
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/paths-info/{revision}",
- data={
- "paths": paths if isinstance(paths, list) else [paths],
- "expand": expand,
- },
- headers=headers,
- )
- hf_raise_for_status(response)
- paths_info = response.json()
- return [
- RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info)
- for path_info in paths_info
- ]
- @validate_hf_hub_args
- def super_squash_history(
- self,
- repo_id: str,
- *,
- branch: str | None = None,
- commit_message: str | None = None,
- repo_type: str | None = None,
- token: str | bool | None = None,
- ) -> None:
- """Squash commit history on a branch for a repo on the Hub.
- Squashing the repo history is useful when you know you'll make hundreds of commits and you don't want to
- clutter the history. Squashing commits can only be performed from the head of a branch.
- > [!WARNING]
- > Once squashed, the commit history cannot be retrieved. This is a non-revertible operation.
- > [!WARNING]
- > Once the history of a branch has been squashed, it is not possible to merge it back into another branch since
- > their history will have diverged.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- branch (`str`, *optional*):
- The branch to squash. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The commit message to use for the squashed commit.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if
- listing from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
- does not exist.
- [`~utils.RevisionNotFoundError`]:
- If the branch to squash cannot be found.
- [`~utils.BadRequestError`]:
- If invalid reference for a branch. You cannot squash history on tags.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # Create repo
- >>> repo_id = api.create_repo("test-squash").repo_id
- # Make a lot of commits.
- >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"content")
- >>> api.upload_file(repo_id=repo_id, path_in_repo="lfs.bin", path_or_fileobj=b"content")
- >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"another_content")
- # Squash history
- >>> api.super_squash_history(repo_id=repo_id)
- ```
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- if repo_type not in constants.REPO_TYPES:
- raise ValueError("Invalid repo type")
- if branch is None:
- branch = constants.DEFAULT_REVISION
- # Prepare request
- url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/super-squash/{quote(branch, safe='')}"
- headers = self._build_hf_headers(token=token)
- commit_message = commit_message or f"Super-squash branch '{branch}' using huggingface_hub"
- # Super-squash
- response = get_session().post(url=url, headers=headers, json={"message": commit_message})
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def list_lfs_files(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- token: bool | str | None = None,
- ) -> Iterable[LFSFileInfo]:
- """
- List all LFS files in a repo on the Hub.
- This is primarily useful to count how much storage a repo is using and to eventually clean up large files
- with [`permanently_delete_lfs_files`]. Note that this would be a permanent action that will affect all commits
- referencing this deleted files and that cannot be undone.
- Args:
- repo_id (`str`):
- The repository for which you are listing LFS files.
- repo_type (`str`, *optional*):
- Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or
- `"model"` if listing from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[LFSFileInfo]`: An iterator of [`LFSFileInfo`] objects.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> lfs_files = api.list_lfs_files("username/my-cool-repo")
- # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`.
- # e.g. select only LFS files in the "checkpoints" folder
- >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/"))
- # Permanently delete LFS files
- >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete)
- ```
- """
- # Prepare request
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files"
- headers = self._build_hf_headers(token=token)
- # Paginate over LFS items
- for item in paginate(url, params={}, headers=headers):
- yield LFSFileInfo(**item)
- @validate_hf_hub_args
- def permanently_delete_lfs_files(
- self,
- repo_id: str,
- lfs_files: Iterable[LFSFileInfo],
- *,
- rewrite_history: bool = True,
- repo_type: str | None = None,
- token: bool | str | None = None,
- ) -> None:
- """
- Permanently delete LFS files from a repo on the Hub.
- > [!WARNING]
- > This is a permanent action that will affect all commits referencing the deleted files and might corrupt your
- > repository. This is a non-revertible operation. Use it only if you know what you are doing.
- Args:
- repo_id (`str`):
- The repository for which you are listing LFS files.
- lfs_files (`Iterable[LFSFileInfo]`):
- An iterable of [`LFSFileInfo`] items to permanently delete from the repo. Use [`list_lfs_files`] to list
- all LFS files from a repo.
- rewrite_history (`bool`, *optional*, default to `True`):
- Whether to rewrite repository history to remove file pointers referencing the deleted LFS files (recommended).
- repo_type (`str`, *optional*):
- Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or
- `"model"` if listing from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> lfs_files = api.list_lfs_files("username/my-cool-repo")
- # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`.
- # e.g. select only LFS files in the "checkpoints" folder
- >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/"))
- # Permanently delete LFS files
- >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete)
- ```
- """
- # Prepare request
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files/batch"
- headers = self._build_hf_headers(token=token)
- # Delete LFS items by batches of 1000
- for batch in chunk_iterable(lfs_files, 1000):
- shas = [item.file_oid for item in batch]
- if len(shas) == 0:
- return
- payload = {
- "deletions": {
- "sha": shas,
- "rewriteHistory": rewrite_history,
- }
- }
- response = get_session().post(url, headers=headers, json=payload)
- hf_raise_for_status(response)
- @_deprecate_arguments(
- version="2.0",
- deprecated_args={"space_storage"},
- custom_message="Use `space_volumes` to mount volumes on a Space.",
- )
- @validate_hf_hub_args
- def create_repo(
- self,
- repo_id: str,
- *,
- token: str | bool | None = None,
- private: bool | None = None,
- visibility: RepoVisibility_T | None = None,
- repo_type: str | None = None,
- exist_ok: bool = False,
- resource_group_id: str | None = None,
- space_sdk: str | None = None,
- space_hardware: SpaceHardware | None = None,
- space_storage: SpaceStorage | None = None,
- space_sleep_time: int | None = None,
- space_secrets: list[dict[str, str]] | None = None,
- space_variables: list[dict[str, str]] | None = None,
- space_volumes: list[Volume] | None = None,
- ) -> RepoUrl:
- """Create an empty repo on the HuggingFace Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- private (`bool`, *optional*):
- Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. Cannot be passed together with `visibility`.
- visibility (`Literal["public", "private", "protected"]`, *optional*):
- Visibility of the repo. Can be `"public"` or `"private"`, or `"protected"` for Spaces. If `None`
- (default), the repo will be public unless the organization's default is private. This value is ignored
- if the repo already exists.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if repo already exists.
- resource_group_id (`str`, *optional*):
- Resource group in which to create the repo. Resource groups is only available for Enterprise Hub organizations and
- allow to define which members of the organization can access the resource. The ID of a resource group
- can be found in the URL of the resource's page on the Hub (e.g. `"66670e5163145ca562cb1988"`).
- To learn more about resource groups, see https://huggingface.co/docs/hub/en/security-resource-groups.
- space_sdk (`str`, *optional*):
- Choice of SDK to use if repo_type is "space". Can be "streamlit", "gradio", "docker", or "static".
- space_hardware (`SpaceHardware` or `str`, *optional*):
- Choice of Hardware if repo_type is "space". See [`SpaceHardware`] for a complete list.
- space_storage (`SpaceStorage` or `str`, *optional*):
- <Deprecated, use `set_space_volumes` instead> Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list.
- space_sleep_time (`int`, *optional*):
- Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want
- your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure
- the sleep time (value is fixed to 48 hours of inactivity).
- See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
- space_secrets (`list[dict[str, str]]`, *optional*):
- A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
- space_variables (`list[dict[str, str]]`, *optional*):
- A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables.
- space_volumes (`list[Volume]`, *optional*):
- A list of [`Volume`] objects to mount in the Space at creation time. Each volume has a `type`
- (`"bucket"`, `"model"`, `"dataset"`, or `"space"`), a `source` (repo or bucket ID), a `mount_path`
- (path inside the container), and optional `revision`, `read_only`, and `path` fields.
- Only applicable if repo_type is "space".
- Returns:
- [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing
- attributes like `endpoint`, `repo_type` and `repo_id`.
- """
- organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id)
- path = f"{self.endpoint}/api/repos/create"
- if repo_type not in constants.REPO_TYPES_WITH_KERNEL:
- raise ValueError("Invalid repo type")
- resolved_visibility = _resolve_repo_visibility(private=private, visibility=visibility, repo_type=repo_type)
- payload: dict[str, Any] = {"name": name, "organization": organization}
- if resolved_visibility is not None:
- payload["visibility"] = resolved_visibility
- if repo_type is not None:
- payload["type"] = repo_type
- if repo_type == "space":
- if space_sdk is None:
- raise ValueError(
- "No space_sdk provided. `create_repo` expects space_sdk to be one"
- f" of {constants.SPACES_SDK_TYPES} when repo_type is 'space'`"
- )
- if space_sdk not in constants.SPACES_SDK_TYPES:
- raise ValueError(f"Invalid space_sdk. Please choose one of {constants.SPACES_SDK_TYPES}.")
- payload["sdk"] = space_sdk
- if space_sdk is not None and repo_type != "space":
- warnings.warn("Ignoring provided space_sdk because repo_type is not 'space'.")
- space_args: list[tuple[str, str, Any]] = [
- # input arg, payload key, value
- ("space_hardware", "hardware", space_hardware),
- ("space_storage", "storageTier", space_storage),
- ("space_sleep_time", "sleepTimeSeconds", space_sleep_time),
- ("space_secrets", "secrets", space_secrets),
- ("space_variables", "variables", space_variables),
- ("space_volumes", "volumes", [v.to_dict() for v in space_volumes] if space_volumes else None),
- ]
- if repo_type == constants.REPO_TYPE_SPACE:
- for _, key, value in space_args:
- if value is not None:
- payload[key] = value
- if space_sleep_time is not None and space_hardware == SpaceHardware.CPU_BASIC:
- warnings.warn(
- "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more"
- " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if"
- " you want to set a custom sleep time, you need to upgrade to a paid Hardware.",
- UserWarning,
- )
- else:
- if provided_space_args := [arg for arg, _, value in space_args if value is not None]:
- warnings.warn(f"Ignoring provided {', '.join(provided_space_args)} because repo_type is not 'space'.")
- if resource_group_id is not None:
- payload["resourceGroupId"] = resource_group_id
- headers = self._build_hf_headers(token=token)
- while True:
- r = get_session().post(path, headers=headers, json=payload)
- if r.status_code == 409 and "Cannot create repo: another conflicting operation is in progress" in r.text:
- # Since https://github.com/huggingface/moon-landing/pull/7272 (private repo), it is not possible to
- # concurrently create repos on the Hub for a same user. This is rarely an issue, except when running
- # tests. To avoid any inconvenience, we retry to create the repo for this specific error.
- # NOTE: This could have being fixed directly in the tests but adding it here should fixed CIs for all
- # dependent libraries.
- # NOTE: If a fix is implemented server-side, we should be able to remove this retry mechanism.
- logger.debug("Create repo failed due to a concurrency issue. Retrying...")
- continue
- break
- try:
- hf_raise_for_status(r)
- except HfHubHTTPError as err:
- if exist_ok and err.response.status_code == 409:
- # Repo already exists and `exist_ok=True`
- pass
- elif exist_ok and err.response.status_code == 403:
- # No write permission on the namespace but repo might already exist
- try:
- self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token)
- if repo_type is None or repo_type == constants.REPO_TYPE_MODEL:
- return RepoUrl(f"{self.endpoint}/{repo_id}")
- return RepoUrl(f"{self.endpoint}/{constants.REPO_TYPES_URL_PREFIXES[repo_type]}{repo_id}")
- except HfHubHTTPError:
- raise err
- else:
- raise
- d = r.json()
- return RepoUrl(d["url"], endpoint=self.endpoint)
- @validate_hf_hub_args
- def delete_repo(
- self,
- repo_id: str,
- *,
- token: str | bool | None = None,
- repo_type: str | None = None,
- missing_ok: bool = False,
- ) -> None:
- """
- Delete a repo from the HuggingFace Hub. CAUTION: this is irreversible.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model.
- missing_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if repo does not exist.
- Raises:
- [`~utils.RepositoryNotFoundError`]
- If the repository to delete from cannot be found and `missing_ok` is set to False (default).
- """
- organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id)
- path = f"{self.endpoint}/api/repos/delete"
- if repo_type not in constants.REPO_TYPES_WITH_KERNEL:
- raise ValueError("Invalid repo type")
- json = {"name": name, "organization": organization}
- if repo_type is not None:
- json["type"] = repo_type
- headers = self._build_hf_headers(token=token)
- r = get_session().request("DELETE", path, headers=headers, json=json)
- reset_xet_connection_info_cache_for_repo(repo_type, repo_id)
- try:
- hf_raise_for_status(r)
- except RepositoryNotFoundError:
- if not missing_ok:
- raise
- @validate_hf_hub_args
- def update_repo_settings(
- self,
- repo_id: str,
- *,
- gated: Literal["auto", "manual", False] | None = None,
- private: bool | None = None,
- visibility: RepoVisibility_T | None = None,
- token: str | bool | None = None,
- repo_type: str | None = None,
- ) -> None:
- """
- Update the settings of a repository, including gated access and visibility.
- To give more control over how repos are used, the Hub allows repo authors to enable
- access requests for their repos, and also to change the visibility of the repo.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a /.
- gated (`Literal["auto", "manual", False]`, *optional*):
- The gated status for the repository. If set to `None` (default), the `gated` setting of the repository won't be updated.
- * "auto": The repository is gated, and access requests are automatically approved or denied based on predefined criteria.
- * "manual": The repository is gated, and access requests require manual approval.
- * False : The repository is not gated, and anyone can access it.
- private (`bool`, *optional*):
- Whether the repository should be private. Cannot be passed together with `visibility`.
- visibility (`Literal["public", "private", "protected"]`, *optional*):
- Visibility of the repository. Can be `"public"` or `"private"`, or `"protected"` for Spaces.
- token (`Union[str, bool, None]`, *optional*):
- A valid user access token (string). Defaults to the locally saved token,
- which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass False.
- repo_type (`str`, *optional*):
- The type of the repository to update settings from (`"model"`, `"dataset"` or `"space"`).
- Defaults to `"model"`.
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If gated is not one of "auto", "manual", or False.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If repo_type is not one of the values in constants.REPO_TYPES.
- [`~utils.HfHubHTTPError`]:
- If the request to the Hugging Face Hub API fails.
- [`~utils.RepositoryNotFoundError`]
- If the repository to download from cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL # default repo type
- resolved_visibility = _resolve_repo_visibility(private=private, visibility=visibility, repo_type=repo_type)
- # Prepare the JSON payload for the PUT request
- payload: dict = {}
- if gated is not None:
- if gated not in ["auto", "manual", False]:
- raise ValueError(f"Invalid gated status, must be one of 'auto', 'manual', or False. Got '{gated}'.")
- payload["gated"] = gated
- if resolved_visibility is not None:
- payload["visibility"] = resolved_visibility
- if len(payload) == 0:
- raise ValueError("At least one setting must be updated.")
- # Build headers
- headers = self._build_hf_headers(token=token)
- r = get_session().put(
- url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings",
- headers=headers,
- json=payload,
- )
- hf_raise_for_status(r)
- def move_repo(
- self,
- from_id: str,
- to_id: str,
- *,
- repo_type: str | None = None,
- token: str | bool | None = None,
- ):
- """
- Moving a repository from namespace1/repo_name1 to namespace2/repo_name2
- Note there are certain limitations. For more information about moving
- repositories, please see
- https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo.
- Args:
- from_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`. Original repository identifier.
- to_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`. Final repository identifier.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- if len(from_id.split("/")) != 2:
- raise ValueError(f"Invalid repo_id: {from_id}. It should have a namespace (:namespace:/:repo_name:)")
- if len(to_id.split("/")) != 2:
- raise ValueError(f"Invalid repo_id: {to_id}. It should have a namespace (:namespace:/:repo_name:)")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL # Hub won't accept `None`.
- json = {"fromRepo": from_id, "toRepo": to_id, "type": repo_type}
- path = f"{self.endpoint}/api/repos/move"
- headers = self._build_hf_headers(token=token)
- r = get_session().post(path, headers=headers, json=json)
- try:
- hf_raise_for_status(r)
- except HfHubHTTPError as e:
- e.append_to_message(
- "\nFor additional documentation please see"
- " https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo."
- )
- raise
- @overload
- def create_commit( # type: ignore
- self,
- repo_id: str,
- operations: Iterable[CommitOperation],
- *,
- commit_message: str,
- commit_description: str | None = None,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- create_pr: bool | None = None,
- num_threads: int = 5,
- parent_commit: str | None = None,
- run_as_future: Literal[False] = ...,
- _hot_reload: bool | None = None,
- ) -> CommitInfo: ...
- @overload
- def create_commit(
- self,
- repo_id: str,
- operations: Iterable[CommitOperation],
- *,
- commit_message: str,
- commit_description: str | None = None,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- create_pr: bool | None = None,
- num_threads: int = 5,
- parent_commit: str | None = None,
- run_as_future: Literal[True] = ...,
- _hot_reload: bool | None = None,
- ) -> Future[CommitInfo]: ...
- @validate_hf_hub_args
- @future_compatible
- def create_commit(
- self,
- repo_id: str,
- operations: Iterable[CommitOperation],
- *,
- commit_message: str,
- commit_description: str | None = None,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- create_pr: bool | None = None,
- num_threads: int = 5,
- parent_commit: str | None = None,
- run_as_future: bool = False,
- _hot_reload: bool | None = None,
- ) -> CommitInfo | Future[CommitInfo]:
- """
- Creates a commit in the given repo, deleting & uploading files as needed.
- > [!WARNING]
- > The input list of `CommitOperation` will be mutated during the commit process. Do not reuse the same objects
- > for multiple commits.
- > [!WARNING]
- > `create_commit` assumes that the repo already exists on the Hub. If you get a
- > Client error 404, please make sure you are authenticated, that your token has the required permissions,
- > and that `repo_id` and `repo_type` are set correctly. If repo does not exist,
- > create it first using [`~hf_api.create_repo`].
- > [!WARNING]
- > `create_commit` is limited to 25k LFS files and a 1GB payload for regular files.
- Args:
- repo_id (`str`):
- The repository in which the commit will be created, for example:
- `"username/custom_transformers"`
- operations (`Iterable` of [`~hf_api.CommitOperation`]):
- An iterable of operations to include in the commit, either:
- - [`~hf_api.CommitOperationAdd`] to upload a file
- - [`~hf_api.CommitOperationDelete`] to delete a file
- - [`~hf_api.CommitOperationCopy`] to copy a file
- Operation objects will be mutated to include information relative to the upload. Do not reuse the
- same objects for multiple commits.
- commit_message (`str`):
- The summary (first line) of the commit that will be created.
- commit_description (`str`, *optional*):
- The description of the commit that will be created
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- num_threads (`int`, *optional*):
- Number of concurrent threads for uploading files. Defaults to 5.
- Setting it to 2 means at most 2 files will be uploaded concurrently.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string.
- Shorthands (7 first characters) are also supported. If specified and `create_pr` is `False`,
- the commit will fail if `revision` does not point to `parent_commit`. If specified and `create_pr`
- is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit`
- ensures the repo has not changed before committing the changes, and can be especially useful
- if the repo is updated / committed to concurrently.
- run_as_future (`bool`, *optional*):
- Whether or not to run this method in the background. Background jobs are run sequentially without
- blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects)
- object. Defaults to `False`.
- Returns:
- [`CommitInfo`] or `Future`:
- Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit
- url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will
- contain the result when executed.
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If commit message is empty.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If parent commit is not a valid commit OID.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If a README.md file with an invalid metadata section is committed. In this case, the commit will fail
- early, before trying to upload any file.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If `create_pr` is `True` and revision is neither `None` nor `"main"`.
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- """
- if parent_commit is not None and not constants.REGEX_COMMIT_OID.fullmatch(parent_commit):
- raise ValueError(
- f"`parent_commit` is not a valid commit OID. It must match the following regex: {constants.REGEX_COMMIT_OID}"
- )
- if commit_message is None or len(commit_message) == 0:
- raise ValueError("`commit_message` can't be empty, please pass a value.")
- commit_description = commit_description if commit_description is not None else ""
- repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- unquoted_revision = revision or constants.DEFAULT_REVISION
- revision = quote(unquoted_revision, safe="")
- create_pr = create_pr if create_pr is not None else False
- _hot_reload = _hot_reload if _hot_reload is not None else False
- headers = self._build_hf_headers(token=token)
- operations = list(operations)
- additions = [op for op in operations if isinstance(op, CommitOperationAdd)]
- copies = [op for op in operations if isinstance(op, CommitOperationCopy)]
- nb_additions = len(additions)
- nb_copies = len(copies)
- nb_deletions = len(operations) - nb_additions - nb_copies
- for addition in additions:
- if addition._is_committed:
- raise ValueError(
- f"CommitOperationAdd {addition} has already being committed and cannot be reused. Please create a"
- " new CommitOperationAdd object if you want to create a new commit."
- )
- if repo_type != "dataset":
- for addition in additions:
- if addition.path_in_repo.endswith((".arrow", ".parquet")):
- warnings.warn(
- f"It seems that you are about to commit a data file ({addition.path_in_repo}) to a {repo_type}"
- " repository. You are sure this is intended? If you are trying to upload a dataset, please"
- " set `repo_type='dataset'` or `--repo-type=dataset` in a CLI."
- )
- logger.debug(
- f"About to commit to the hub: {len(additions)} addition(s), {len(copies)} copie(s) and"
- f" {nb_deletions} deletion(s)."
- )
- # If updating a README.md file, make sure the metadata format is valid
- # It's better to fail early than to fail after all the files have been uploaded.
- for addition in additions:
- if addition.path_in_repo == "README.md":
- with addition.as_file() as file:
- content = file.read().decode()
- self._validate_yaml(content, repo_type=repo_type, token=token)
- # Skip other additions after `README.md` has been processed
- break
- # If updating twice the same file or update then delete a file in a single commit
- _warn_on_overwriting_operations(operations)
- self.preupload_lfs_files(
- repo_id=repo_id,
- additions=additions,
- token=token,
- repo_type=repo_type,
- revision=unquoted_revision, # first-class methods take unquoted revision
- create_pr=create_pr,
- num_threads=num_threads,
- free_memory=False, # do not remove `CommitOperationAdd.path_or_fileobj` on LFS files for "normal" users
- )
- files_to_copy = _fetch_files_to_copy(
- copies=copies,
- repo_type=repo_type,
- repo_id=repo_id,
- headers=headers,
- revision=unquoted_revision,
- endpoint=self.endpoint,
- )
- # Remove no-op operations (files that have not changed)
- operations_without_no_op = []
- for operation in operations:
- if (
- isinstance(operation, CommitOperationAdd)
- and operation._remote_oid is not None
- and operation._remote_oid == operation._local_oid
- ):
- # File already exists on the Hub and has not changed: we can skip it.
- logger.debug(f"Skipping upload for '{operation.path_in_repo}' as the file has not changed.")
- continue
- if (
- isinstance(operation, CommitOperationCopy)
- and operation._dest_oid is not None
- and operation._dest_oid == operation._src_oid
- ):
- # Source and destination files are identical - skip
- logger.debug(
- f"Skipping copy for '{operation.src_path_in_repo}' -> '{operation.path_in_repo}' as the content of the source file is the same as the destination file."
- )
- continue
- operations_without_no_op.append(operation)
- if len(operations) != len(operations_without_no_op):
- logger.info(
- f"Removing {len(operations) - len(operations_without_no_op)} file(s) from commit that have not changed."
- )
- # Return early if empty commit
- if len(operations_without_no_op) == 0:
- logger.warning("No files have been modified since last commit. Skipping to prevent empty commit.")
- # Get latest commit info
- try:
- info = self.repo_info(repo_id=repo_id, repo_type=repo_type, revision=unquoted_revision, token=token)
- except RepositoryNotFoundError as e:
- e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)
- raise
- # Return commit info based on latest commit
- url_prefix = self.endpoint
- if repo_type is not None and repo_type != constants.REPO_TYPE_MODEL:
- url_prefix = f"{url_prefix}/{repo_type}s"
- return CommitInfo(
- commit_url=f"{url_prefix}/{repo_id}/commit/{info.sha}",
- commit_message=commit_message,
- commit_description=commit_description,
- oid=info.sha, # type: ignore
- _endpoint=self.endpoint,
- )
- commit_payload = _prepare_commit_payload(
- operations=operations,
- files_to_copy=files_to_copy,
- commit_message=commit_message,
- commit_description=commit_description,
- parent_commit=parent_commit,
- )
- commit_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/commit/{revision}"
- def _payload_as_ndjson() -> Iterable[bytes]:
- for item in commit_payload:
- yield json.dumps(item).encode()
- yield b"\n"
- headers = {
- # See https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
- "Content-Type": "application/x-ndjson",
- **headers,
- }
- data = b"".join(_payload_as_ndjson())
- params: dict[str, Any] = {}
- if create_pr:
- params["create_pr"] = "1"
- if _hot_reload:
- params["hot_reload"] = "1"
- try:
- commit_resp = get_session().post(url=commit_url, headers=headers, content=data, params=params)
- hf_raise_for_status(commit_resp, endpoint_name="commit")
- except RepositoryNotFoundError as e:
- e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)
- raise
- except RemoteEntryNotFoundError as e:
- if nb_deletions > 0 and "A file with this name doesn't exist" in str(e):
- e.append_to_message(
- "\nMake sure to differentiate file and folder paths in delete"
- " operations with a trailing '/' or using `is_folder=True/False`."
- )
- raise
- # Mark additions as committed (cannot be reused in another commit)
- for addition in additions:
- addition._is_committed = True
- commit_data = commit_resp.json()
- return CommitInfo(
- commit_url=commit_data["commitUrl"],
- commit_message=commit_message,
- commit_description=commit_description,
- oid=commit_data["commitOid"],
- pr_url=commit_data["pullRequestUrl"] if create_pr else None,
- _endpoint=self.endpoint,
- )
- def preupload_lfs_files(
- self,
- repo_id: str,
- additions: Iterable[CommitOperationAdd],
- *,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- create_pr: bool | None = None,
- num_threads: int = 5,
- free_memory: bool = True,
- gitignore_content: str | None = None,
- ):
- """Pre-upload LFS files to S3 in preparation on a future commit.
- This method is useful if you are generating the files to upload on-the-fly and you don't want to store them
- in memory before uploading them all at once.
- > [!WARNING]
- > This is a power-user method. You shouldn't need to call it directly to make a normal commit.
- > Use [`create_commit`] directly instead.
- > [!WARNING]
- > Commit operations will be mutated during the process. In particular, the attached `path_or_fileobj` will be
- > removed after the upload to save memory (and replaced by an empty `bytes` object). Do not reuse the same
- > objects except to pass them to [`create_commit`]. If you don't want to remove the attached content from the
- > commit operation object, pass `free_memory=False`.
- Args:
- repo_id (`str`):
- The repository in which you will commit the files, for example: `"username/custom_transformers"`.
- operations (`Iterable` of [`CommitOperationAdd`]):
- The list of files to upload. Warning: the objects in this list will be mutated to include information
- relative to the upload. Do not reuse the same objects for multiple commits.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- The type of repository to upload to (e.g. `"model"` -default-, `"dataset"` or `"space"`).
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- create_pr (`boolean`, *optional*):
- Whether or not you plan to create a Pull Request with that commit. Defaults to `False`.
- num_threads (`int`, *optional*):
- Number of concurrent threads for uploading files. Defaults to 5.
- Setting it to 2 means at most 2 files will be uploaded concurrently.
- gitignore_content (`str`, *optional*):
- The content of the `.gitignore` file to know which files should be ignored. The order of priority
- is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present
- in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub
- (if any).
- Example:
- ```py
- >>> from huggingface_hub import CommitOperationAdd, preupload_lfs_files, create_commit, create_repo
- >>> repo_id = create_repo("test_preupload").repo_id
- # Generate and preupload LFS files one by one
- >>> operations = [] # List of all `CommitOperationAdd` objects that will be generated
- >>> for i in range(5):
- ... content = ... # generate binary content
- ... addition = CommitOperationAdd(path_in_repo=f"shard_{i}_of_5.bin", path_or_fileobj=content)
- ... preupload_lfs_files(repo_id, additions=[addition]) # upload + free memory
- ... operations.append(addition)
- # Create commit
- >>> create_commit(repo_id, operations=operations, commit_message="Commit all shards")
- ```
- """
- repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- create_pr = create_pr if create_pr is not None else False
- headers = self._build_hf_headers(token=token)
- # Check if a `gitignore` file is being committed to the Hub.
- additions = list(additions)
- if gitignore_content is None:
- for addition in additions:
- if addition.path_in_repo == ".gitignore":
- with addition.as_file() as f:
- gitignore_content = f.read().decode()
- break
- # Filter out already uploaded files
- new_additions = [addition for addition in additions if not addition._is_uploaded]
- # Check which new files are LFS
- # For some items, we might have already fetched the upload mode (in case of upload_large_folder)
- additions_no_upload_mode = [addition for addition in new_additions if addition._upload_mode is None]
- if len(additions_no_upload_mode) > 0:
- try:
- _fetch_upload_modes(
- additions=additions_no_upload_mode,
- repo_type=repo_type,
- repo_id=repo_id,
- headers=headers,
- revision=revision,
- endpoint=self.endpoint,
- create_pr=create_pr or False,
- gitignore_content=gitignore_content,
- )
- except RepositoryNotFoundError as e:
- e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)
- raise
- # Filter out regular files
- new_lfs_additions = [addition for addition in new_additions if addition._upload_mode == "lfs"]
- # Filter out files listed in .gitignore
- new_lfs_additions_to_upload = []
- for addition in new_lfs_additions:
- if addition._should_ignore:
- logger.debug(f"Skipping upload for LFS file '{addition.path_in_repo}' (ignored by gitignore file).")
- else:
- new_lfs_additions_to_upload.append(addition)
- if len(new_lfs_additions) != len(new_lfs_additions_to_upload):
- logger.info(
- f"Skipped upload for {len(new_lfs_additions) - len(new_lfs_additions_to_upload)} LFS file(s) "
- "(ignored by gitignore file)."
- )
- # If no LFS files remain to upload, keep previous behavior and log explicitly
- if len(new_lfs_additions_to_upload) == 0:
- logger.debug("No LFS files to upload.")
- return
- # Prepare upload parameters
- upload_kwargs = {
- "additions": new_lfs_additions_to_upload,
- "repo_type": repo_type,
- "repo_id": repo_id,
- "headers": headers,
- "endpoint": self.endpoint,
- # If `create_pr`, we don't want to check user permission on the revision as users with read permission
- # should still be able to create PRs even if they don't have write permission on the target branch of the
- # PR (i.e. `revision`).
- "revision": revision if not create_pr else None,
- }
- _upload_files(**upload_kwargs, num_threads=num_threads, create_pr=create_pr) # type: ignore [arg-type]
- for addition in new_lfs_additions_to_upload:
- addition._is_uploaded = True
- if free_memory:
- addition.path_or_fileobj = b""
- @overload
- def upload_file( # type: ignore
- self,
- *,
- path_or_fileobj: str | Path | bytes | BinaryIO,
- path_in_repo: str,
- repo_id: str,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- run_as_future: Literal[False] = ...,
- _hot_reload: bool | None = None,
- ) -> CommitInfo: ...
- @overload
- def upload_file(
- self,
- *,
- path_or_fileobj: str | Path | bytes | BinaryIO,
- path_in_repo: str,
- repo_id: str,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- run_as_future: Literal[True] = ...,
- _hot_reload: bool | None = None,
- ) -> Future[CommitInfo]: ...
- @validate_hf_hub_args
- @future_compatible
- def upload_file(
- self,
- *,
- path_or_fileobj: str | Path | bytes | BinaryIO,
- path_in_repo: str,
- repo_id: str,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- run_as_future: bool = False,
- _hot_reload: bool | None = None,
- ) -> CommitInfo | Future[CommitInfo]:
- """
- Upload a local file (up to 50 GB) to the given repo. The upload is done
- through a HTTP post request, and doesn't require git or git-lfs to be
- installed.
- Args:
- path_or_fileobj (`str`, `Path`, `bytes`, or `IO`):
- Path to a file on the local machine or binary data stream /
- fileobj / buffer.
- path_in_repo (`str`):
- Relative filepath in the repo, for example:
- `"checkpoints/1fec34a/weights.bin"`
- repo_id (`str`):
- The repository to which the file will be uploaded, for example:
- `"username/custom_transformers"`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit
- commit_description (`str` *optional*)
- The description of the generated commit
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- run_as_future (`bool`, *optional*):
- Whether or not to run this method in the background. Background jobs are run sequentially without
- blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects)
- object. Defaults to `False`.
- Returns:
- [`CommitInfo`] or `Future`:
- Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit
- url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will
- contain the result when executed.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- > [!WARNING]
- > `upload_file` assumes that the repo already exists on the Hub. If you get a
- > Client error 404, please make sure you are authenticated, that your token has the required permissions,
- > and that `repo_id` and `repo_type` are set correctly. If repo does not exist,
- > create it first using [`~hf_api.create_repo`].
- Example:
- ```python
- >>> from huggingface_hub import upload_file
- >>> with open("./local/filepath", "rb") as fobj:
- ... upload_file(
- ... path_or_fileobj=fileobj,
- ... path_in_repo="remote/file/path.h5",
- ... repo_id="username/my-dataset",
- ... repo_type="dataset",
- ... token="my_token",
- ... )
- >>> upload_file(
- ... path_or_fileobj=".\\\\local\\\\file\\\\path",
- ... path_in_repo="remote/file/path.h5",
- ... repo_id="username/my-model",
- ... token="my_token",
- ... )
- >>> upload_file(
- ... path_or_fileobj=".\\\\local\\\\file\\\\path",
- ... path_in_repo="remote/file/path.h5",
- ... repo_id="username/my-model",
- ... token="my_token",
- ... create_pr=True,
- ... )
- ```
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- commit_message = (
- commit_message if commit_message is not None else f"Upload {path_in_repo} with huggingface_hub"
- )
- operation = CommitOperationAdd(
- path_or_fileobj=path_or_fileobj,
- path_in_repo=path_in_repo,
- )
- return self.create_commit(
- repo_id=repo_id,
- repo_type=repo_type,
- operations=[operation],
- commit_message=commit_message,
- commit_description=commit_description,
- token=token,
- revision=revision,
- create_pr=create_pr,
- _hot_reload=_hot_reload,
- parent_commit=parent_commit,
- )
- @overload
- def upload_folder( # type: ignore
- self,
- *,
- repo_id: str,
- folder_path: str | Path,
- path_in_repo: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- allow_patterns: list[str] | str | None = None,
- ignore_patterns: list[str] | str | None = None,
- delete_patterns: list[str] | str | None = None,
- run_as_future: Literal[False] = ...,
- ) -> CommitInfo: ...
- @overload
- def upload_folder( # type: ignore
- self,
- *,
- repo_id: str,
- folder_path: str | Path,
- path_in_repo: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- allow_patterns: list[str] | str | None = None,
- ignore_patterns: list[str] | str | None = None,
- delete_patterns: list[str] | str | None = None,
- run_as_future: Literal[True] = ...,
- ) -> Future[CommitInfo]: ...
- @validate_hf_hub_args
- @future_compatible
- def upload_folder(
- self,
- *,
- repo_id: str,
- folder_path: str | Path,
- path_in_repo: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- allow_patterns: list[str] | str | None = None,
- ignore_patterns: list[str] | str | None = None,
- delete_patterns: list[str] | str | None = None,
- run_as_future: bool = False,
- ) -> CommitInfo | Future[CommitInfo]:
- """
- Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or
- git-lfs to be installed.
- The structure of the folder will be preserved. Files with the same name already present in the repository will
- be overwritten. Others will be left untouched.
- Use the `allow_patterns` and `ignore_patterns` arguments to specify which files to upload. These parameters
- accept either a single pattern or a list of patterns. Patterns are Standard Wildcards (globbing patterns) as
- documented [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm). If both `allow_patterns` and
- `ignore_patterns` are provided, both constraints apply. By default, all files from the folder are uploaded.
- Use the `delete_patterns` argument to specify remote files you want to delete. Input type is the same as for
- `allow_patterns` (see above). If `path_in_repo` is also provided, the patterns are matched against paths
- relative to this folder. For example, `upload_folder(..., path_in_repo="experiment", delete_patterns="logs/*")`
- will delete any remote file under `./experiment/logs/`. Note that the `.gitattributes` file will not be deleted
- even if it matches the patterns.
- Any `.git/` folder present in any subdirectory will be ignored. However, please be aware that the `.gitignore`
- file is not taken into account.
- Uses `HfApi.create_commit` under the hood.
- Args:
- repo_id (`str`):
- The repository to which the file will be uploaded, for example:
- `"username/custom_transformers"`
- folder_path (`str` or `Path`):
- Path to the folder to upload on the local file system
- path_in_repo (`str`, *optional*):
- Relative path of the directory in the repo, for example:
- `"checkpoints/1fec34a/results"`. Will default to the root folder of the repository.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit. Defaults to:
- `f"Upload {path_in_repo} with huggingface_hub"`
- commit_description (`str` *optional*):
- The description of the generated commit
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`. If `revision` is not
- set, PR is opened against the `"main"` branch. If `revision` is set and is a branch, PR is opened
- against this branch. If `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- allow_patterns (`list[str]` or `str`, *optional*):
- If provided, only files matching at least one pattern are uploaded.
- ignore_patterns (`list[str]` or `str`, *optional*):
- If provided, files matching any of the patterns are not uploaded.
- delete_patterns (`list[str]` or `str`, *optional*):
- If provided, remote files matching any of the patterns will be deleted from the repo while committing
- new files. This is useful if you don't know which files have already been uploaded.
- Note: to avoid discrepancies the `.gitattributes` file is not deleted even if it matches the pattern.
- run_as_future (`bool`, *optional*):
- Whether or not to run this method in the background. Background jobs are run sequentially without
- blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects)
- object. Defaults to `False`.
- Returns:
- [`CommitInfo`] or `Future`:
- Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit
- url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will
- contain the result when executed.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > [!WARNING]
- > `upload_folder` assumes that the repo already exists on the Hub. If you get a Client error 404, please make
- > sure you are authenticated, that your token has the required permissions, and that `repo_id` and `repo_type`
- > are set correctly. If repo does not exist, create it first using [`~hf_api.create_repo`].
- > [!TIP]
- > When dealing with a large folder (thousands of files or hundreds of GB), we recommend using [`~hf_api.upload_large_folder`] instead.
- Example:
- ```python
- # Upload checkpoints folder except the log files
- >>> upload_folder(
- ... folder_path="local/checkpoints",
- ... path_in_repo="remote/experiment/checkpoints",
- ... repo_id="username/my-dataset",
- ... repo_type="datasets",
- ... token="my_token",
- ... ignore_patterns="**/logs/*.txt",
- ... )
- # Upload checkpoints folder including logs while deleting existing logs from the repo
- # Useful if you don't know exactly which log files have already being pushed
- >>> upload_folder(
- ... folder_path="local/checkpoints",
- ... path_in_repo="remote/experiment/checkpoints",
- ... repo_id="username/my-dataset",
- ... repo_type="datasets",
- ... token="my_token",
- ... delete_patterns="**/logs/*.txt",
- ... )
- # Upload checkpoints folder while creating a PR
- >>> upload_folder(
- ... folder_path="local/checkpoints",
- ... path_in_repo="remote/experiment/checkpoints",
- ... repo_id="username/my-dataset",
- ... repo_type="datasets",
- ... token="my_token",
- ... create_pr=True,
- ... )
- ```
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- # By default, upload folder to the root directory in repo.
- if path_in_repo is None:
- path_in_repo = ""
- # Do not upload .git folder
- if ignore_patterns is None:
- ignore_patterns = []
- elif isinstance(ignore_patterns, str):
- ignore_patterns = [ignore_patterns]
- ignore_patterns += DEFAULT_IGNORE_PATTERNS
- delete_operations = self._prepare_folder_deletions(
- repo_id=repo_id,
- repo_type=repo_type,
- revision=constants.DEFAULT_REVISION if create_pr else revision,
- token=token,
- path_in_repo=path_in_repo,
- delete_patterns=delete_patterns,
- )
- add_operations = self._prepare_upload_folder_additions(
- folder_path,
- path_in_repo,
- allow_patterns=allow_patterns,
- ignore_patterns=ignore_patterns,
- token=token,
- repo_type=repo_type,
- )
- # Optimize operations: if some files will be overwritten, we don't need to delete them first
- if len(add_operations) > 0:
- added_paths = {op.path_in_repo for op in add_operations}
- delete_operations = [
- delete_op for delete_op in delete_operations if delete_op.path_in_repo not in added_paths
- ]
- commit_operations = delete_operations + add_operations
- commit_message = commit_message or "Upload folder using huggingface_hub"
- return self.create_commit(
- repo_type=repo_type,
- repo_id=repo_id,
- operations=commit_operations,
- commit_message=commit_message,
- commit_description=commit_description,
- token=token,
- revision=revision,
- create_pr=create_pr,
- parent_commit=parent_commit,
- )
- @validate_hf_hub_args
- def delete_file(
- self,
- path_in_repo: str,
- repo_id: str,
- *,
- token: str | bool | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- ) -> CommitInfo:
- """
- Deletes a file in the given repo.
- Args:
- path_in_repo (`str`):
- Relative filepath in the repo, for example:
- `"checkpoints/1fec34a/weights.bin"`
- repo_id (`str`):
- The repository from which the file will be deleted, for example:
- `"username/custom_transformers"`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if the file is in a dataset or
- space, `None` or `"model"` if in a model. Default is `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit. Defaults to
- `f"Delete {path_in_repo} with huggingface_hub"`.
- commit_description (`str` *optional*)
- The description of the generated commit
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- > - [`~utils.EntryNotFoundError`]
- > If the file to download cannot be found.
- """
- commit_message = (
- commit_message if commit_message is not None else f"Delete {path_in_repo} with huggingface_hub"
- )
- operations = [CommitOperationDelete(path_in_repo=path_in_repo)]
- return self.create_commit(
- repo_id=repo_id,
- repo_type=repo_type,
- token=token,
- operations=operations,
- revision=revision,
- commit_message=commit_message,
- commit_description=commit_description,
- create_pr=create_pr,
- parent_commit=parent_commit,
- )
- @validate_hf_hub_args
- def delete_files(
- self,
- repo_id: str,
- delete_patterns: list[str],
- *,
- token: bool | str | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- ) -> CommitInfo:
- """
- Delete files from a repository on the Hub.
- If a folder path is provided, the entire folder is deleted as well as
- all files it contained.
- Args:
- repo_id (`str`):
- The repository from which the folder will be deleted, for example:
- `"username/custom_transformers"`
- delete_patterns (`list[str]`):
- List of files or folders to delete. Each string can either be
- a file path, a folder path, or a wildcard pattern. Patterns are Standard
- Wildcards (globbing patterns) as documented [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm).
- The pattern matching is based on [`fnmatch`](https://docs.python.org/3/library/fnmatch.html).
- Note that `fnmatch` matches `*` across path boundaries, unlike traditional Unix shell globbing.
- E.g. `["file.txt", "folder/", "data/*.parquet"]`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- to the stored token.
- repo_type (`str`, *optional*):
- Type of the repo to delete files from. Can be `"model"`,
- `"dataset"` or `"space"`. Defaults to `"model"`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary (first line) of the generated commit. Defaults to
- `f"Delete files using huggingface_hub"`.
- commit_description (`str` *optional*)
- The description of the generated commit.
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- """
- operations = self._prepare_folder_deletions(
- repo_id=repo_id, repo_type=repo_type, delete_patterns=delete_patterns, path_in_repo="", revision=revision
- )
- if commit_message is None:
- commit_message = f"Delete files {' '.join(delete_patterns)} with huggingface_hub"
- return self.create_commit(
- repo_id=repo_id,
- repo_type=repo_type,
- token=token,
- operations=operations,
- revision=revision,
- commit_message=commit_message,
- commit_description=commit_description,
- create_pr=create_pr,
- parent_commit=parent_commit,
- )
- @validate_hf_hub_args
- def delete_folder(
- self,
- path_in_repo: str,
- repo_id: str,
- *,
- token: bool | str | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- commit_message: str | None = None,
- commit_description: str | None = None,
- create_pr: bool | None = None,
- parent_commit: str | None = None,
- ) -> CommitInfo:
- """
- Deletes a folder in the given repo.
- Simple wrapper around [`create_commit`] method.
- Args:
- path_in_repo (`str`):
- Relative folder path in the repo, for example: `"checkpoints/1fec34a"`.
- repo_id (`str`):
- The repository from which the folder will be deleted, for example:
- `"username/custom_transformers"`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- to the stored token.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if the folder is in a dataset or
- space, `None` or `"model"` if in a model. Default is `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit. Defaults to
- `f"Delete folder {path_in_repo} with huggingface_hub"`.
- commit_description (`str` *optional*)
- The description of the generated commit.
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- """
- return self.create_commit(
- repo_id=repo_id,
- repo_type=repo_type,
- token=token,
- operations=[CommitOperationDelete(path_in_repo=path_in_repo, is_folder=True)],
- revision=revision,
- commit_message=(
- commit_message if commit_message is not None else f"Delete folder {path_in_repo} with huggingface_hub"
- ),
- commit_description=commit_description,
- create_pr=create_pr,
- parent_commit=parent_commit,
- )
- def upload_large_folder(
- self,
- repo_id: str,
- folder_path: str | Path,
- *,
- repo_type: str, # Repo type is required!
- revision: str | None = None,
- private: bool | None = None,
- allow_patterns: list[str] | str | None = None,
- ignore_patterns: list[str] | str | None = None,
- num_workers: int | None = None,
- print_report: bool = True,
- print_report_every: int = 60,
- ) -> None:
- """Upload a large folder to the Hub in the most resilient way possible.
- Several workers are started to upload files in an optimized way. Before being committed to a repo, files must be
- hashed and be pre-uploaded if they are LFS files. Workers will perform these tasks for each file in the folder.
- At each step, some metadata information about the upload process is saved in the folder under `.cache/.huggingface/`
- to be able to resume the process if interrupted. The whole process might result in several commits.
- Args:
- repo_id (`str`):
- The repository to which the file will be uploaded.
- E.g. `"HuggingFaceTB/smollm-corpus"`.
- folder_path (`str` or `Path`):
- Path to the folder to upload on the local file system.
- repo_type (`str`):
- Type of the repository. Must be one of `"model"`, `"dataset"` or `"space"`.
- Unlike in all other `HfApi` methods, `repo_type` is explicitly required here. This is to avoid
- any mistake when uploading a large folder to the Hub, and therefore prevent from having to re-upload
- everything.
- revision (`str`, `optional`):
- The branch to commit to. If not provided, the `main` branch will be used.
- private (`bool`, `optional`):
- Whether the repository should be private.
- If `None` (default), the repo will be public unless the organization's default is private.
- allow_patterns (`list[str]` or `str`, *optional*):
- If provided, only files matching at least one pattern are uploaded.
- ignore_patterns (`list[str]` or `str`, *optional*):
- If provided, files matching any of the patterns are not uploaded.
- num_workers (`int`, *optional*):
- Number of workers to start. Defaults to half of CPU cores (minimum 1).
- A higher number of workers may speed up the process if your machine allows it. However, on machines with a
- slower connection, it is recommended to keep the number of workers low to ensure better resumability.
- Indeed, partially uploaded files will have to be completely re-uploaded if the process is interrupted.
- print_report (`bool`, *optional*):
- Whether to print a report of the upload progress. Defaults to True.
- Report is printed to `sys.stdout` every X seconds (60 by defaults) and overwrites the previous report.
- print_report_every (`int`, *optional*):
- Frequency at which the report is printed. Defaults to 60 seconds.
- > [!TIP]
- > A few things to keep in mind:
- > - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations
- > - Do not start several processes in parallel.
- > - You can interrupt and resume the process at any time.
- > - Do not upload the same folder to several repositories. If you need to do so, you must delete the local `.cache/.huggingface/` folder first.
- > [!WARNING]
- > While being much more robust to upload large folders, `upload_large_folder` is more limited than [`upload_folder`] feature-wise. In practice:
- > - you cannot set a custom `path_in_repo`. If you want to upload to a subfolder, you need to set the proper structure locally.
- > - you cannot set a custom `commit_message` and `commit_description` since multiple commits are created.
- > - you cannot delete from the repo while uploading. Please make a separate commit first.
- > - you cannot create a PR directly. Please create a PR first (from the UI or using [`create_pull_request`]) and then commit to it by passing `revision`.
- **Technical details:**
- `upload_large_folder` process is as follow:
- 1. (Check parameters and setup.)
- 2. Create repo if missing.
- 3. List local files to upload.
- 4. Run validation checks and display warnings if repository limits might be exceeded:
- - Warns if the total number of files exceeds 100k (recommended limit).
- - Warns if any folder contains more than 10k files (recommended limit).
- - Warns about files larger than 20GB (recommended) or 50GB (hard limit).
- 5. Start workers. Workers can perform the following tasks:
- - Hash a file.
- - Get upload mode (regular or LFS) for a list of files.
- - Pre-upload an LFS file.
- - Commit a bunch of files.
- Once a worker finishes a task, it will move on to the next task based on the priority list (see below) until
- all files are uploaded and committed.
- 6. While workers are up, regularly print a report to sys.stdout.
- Order of priority:
- 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file).
- 2. Commit if at least 150 files are ready to commit.
- 3. Get upload mode if at least 10 files have been hashed.
- 4. Pre-upload LFS file if at least 1 file and no worker is pre-uploading.
- 5. Hash file if at least 1 file and no worker is hashing.
- 6. Get upload mode if at least 1 file and no worker is getting upload mode.
- 7. Pre-upload LFS file if at least 1 file.
- 8. Hash file if at least 1 file to hash.
- 9. Get upload mode if at least 1 file to get upload mode.
- 10. Commit if at least 1 file to commit and at least 1 min since last commit attempt.
- 11. Commit if at least 1 file to commit and all other queues are empty.
- Special rules:
- - Only one worker can commit at a time.
- - If no tasks are available, the worker waits for 10 seconds before checking again.
- """
- return upload_large_folder_internal(
- self,
- repo_id=repo_id,
- folder_path=folder_path,
- repo_type=repo_type,
- revision=revision,
- private=private,
- allow_patterns=allow_patterns,
- ignore_patterns=ignore_patterns,
- num_workers=num_workers,
- print_report=print_report,
- print_report_every=print_report_every,
- )
- @validate_hf_hub_args
- def get_hf_file_metadata(
- self,
- *,
- url: str,
- token: bool | str | None = None,
- timeout: float | None = constants.HF_HUB_ETAG_TIMEOUT,
- ) -> HfFileMetadata:
- """Fetch metadata of a file versioned on the Hub for a given url.
- Args:
- url (`str`):
- File url, for example returned by [`hf_hub_url`].
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- timeout (`float`, *optional*, defaults to 10):
- How many seconds to wait for the server to send metadata before giving up.
- Returns:
- A [`HfFileMetadata`] object containing metadata such as location, etag, size and commit_hash.
- """
- if token is None:
- # Cannot do `token = token or self.token` as token can be `False`.
- token = self.token
- return get_hf_file_metadata(
- url=url,
- token=token,
- timeout=timeout,
- library_name=self.library_name,
- library_version=self.library_version,
- user_agent=self.user_agent,
- endpoint=self.endpoint,
- )
- @overload
- def hf_hub_download(
- self,
- repo_id: str,
- filename: str,
- *,
- subfolder: str | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- cache_dir: str | Path | None = None,
- local_dir: str | Path | None = None,
- force_download: bool = False,
- etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
- token: bool | str | None = None,
- local_files_only: bool = False,
- tqdm_class: type[base_tqdm] | None = None,
- dry_run: Literal[False] = False,
- ) -> str: ...
- @overload
- def hf_hub_download(
- self,
- repo_id: str,
- filename: str,
- *,
- subfolder: str | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- cache_dir: str | Path | None = None,
- local_dir: str | Path | None = None,
- force_download: bool = False,
- etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
- token: bool | str | None = None,
- local_files_only: bool = False,
- tqdm_class: type[base_tqdm] | None = None,
- dry_run: Literal[True],
- ) -> DryRunFileInfo: ...
- @validate_hf_hub_args
- def hf_hub_download(
- self,
- repo_id: str,
- filename: str,
- *,
- subfolder: str | None = None,
- repo_type: str | None = None,
- revision: str | None = None,
- cache_dir: str | Path | None = None,
- local_dir: str | Path | None = None,
- force_download: bool = False,
- etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
- token: bool | str | None = None,
- local_files_only: bool = False,
- tqdm_class: type[base_tqdm] | None = None,
- dry_run: bool = False,
- ) -> str | DryRunFileInfo:
- """Download a given file if it's not already present in the local cache.
- The new cache file layout looks like this:
- - The cache directory contains one subfolder per repo_id (namespaced by repo type)
- - inside each repo folder:
- - refs is a list of the latest known revision => commit_hash pairs
- - blobs contains the actual file blobs (identified by their git-sha or sha256, depending on
- whether they're LFS files or not)
- - snapshots contains one subfolder per commit, each "commit" contains the subset of the files
- that have been resolved at that particular commit. Each filename is a symlink to the blob
- at that particular commit.
- ```
- [ 96] .
- └── [ 160] models--julien-c--EsperBERTo-small
- ├── [ 160] blobs
- │ ├── [321M] 403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
- │ ├── [ 398] 7cb18dc9bafbfcf74629a4b760af1b160957a83e
- │ └── [1.4K] d7edf6bd2a681fb0175f7735299831ee1b22b812
- ├── [ 96] refs
- │ └── [ 40] main
- └── [ 128] snapshots
- ├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f
- │ ├── [ 52] README.md -> ../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812
- │ └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
- └── [ 128] bbc77c8132af1cc5cf678da3f1ddf2de43606d48
- ├── [ 52] README.md -> ../../blobs/7cb18dc9bafbfcf74629a4b760af1b160957a83e
- └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
- ```
- If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this
- option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir`
- to store some metadata related to the downloaded files. While this mechanism is not as robust as the main
- cache-system, it's optimized for regularly pulling the latest version of a repository.
- Args:
- repo_id (`str`):
- A user or an organization name and a repo name separated by a `/`.
- filename (`str`):
- The name of the file in the repo.
- subfolder (`str`, *optional*):
- An optional value corresponding to a folder inside the repository.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if downloading from a dataset or space,
- `None` or `"model"` if downloading from a model. Default is `None`.
- revision (`str`, *optional*):
- An optional Git revision id which can be a branch name, a tag, or a
- commit hash.
- cache_dir (`str`, `Path`, *optional*):
- Path to the folder where cached files are stored.
- local_dir (`str` or `Path`, *optional*):
- If provided, the downloaded file will be placed under this directory.
- force_download (`bool`, *optional*, defaults to `False`):
- Whether the file should be downloaded even if it already exists in
- the local cache.
- etag_timeout (`float`, *optional*, defaults to `10`):
- When fetching ETag, how many seconds to wait for the server to send
- data before giving up which is passed to `httpx.request`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- local_files_only (`bool`, *optional*, defaults to `False`):
- If `True`, avoid downloading the file and return the path to the
- local cached file if it exists.
- tqdm_class (`tqdm`, *optional*):
- If provided, overwrites the default behavior for the progress bar. Passed
- argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior.
- Defaults to the custom HF progress bar that can be disabled by setting
- `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
- dry_run (`bool`, *optional*, defaults to `False`):
- If `True`, perform a dry run without actually downloading the file. Returns a
- [`DryRunFileInfo`] object containing information about what would be downloaded.
- Returns:
- `str` or [`DryRunFileInfo`]:
- - If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk.
- - If `dry_run=True`: A [`DryRunFileInfo`] object containing download information.
- Raises:
- [`~utils.RepositoryNotFoundError`]
- If the repository to download from cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- [`~utils.RevisionNotFoundError`]
- If the revision to download from cannot be found.
- [`~utils.RemoteEntryNotFoundError`]
- If the file to download cannot be found.
- [`~utils.LocalEntryNotFoundError`]
- If network is disabled or unavailable and file is not found in cache.
- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
- If `token=True` but the token cannot be found.
- [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
- If ETag cannot be determined.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If some parameter value is invalid.
- """
- from .file_download import hf_hub_download
- if token is None:
- # Cannot do `token = token or self.token` as token can be `False`.
- token = self.token
- return hf_hub_download(
- repo_id=repo_id,
- filename=filename,
- subfolder=subfolder,
- repo_type=repo_type,
- revision=revision,
- endpoint=self.endpoint,
- library_name=self.library_name,
- library_version=self.library_version,
- cache_dir=cache_dir,
- local_dir=local_dir,
- user_agent=self.user_agent,
- force_download=force_download,
- etag_timeout=etag_timeout,
- token=token,
- headers=self.headers,
- local_files_only=local_files_only,
- tqdm_class=tqdm_class,
- dry_run=dry_run,
- )
- @overload
- def snapshot_download(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- revision: str | None = None,
- cache_dir: str | Path | None = None,
- local_dir: str | Path | None = None,
- etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
- force_download: bool = False,
- token: bool | str | None = None,
- local_files_only: bool = False,
- allow_patterns: list[str] | str | None = None,
- ignore_patterns: list[str] | str | None = None,
- max_workers: int = 8,
- tqdm_class: type[base_tqdm] | None = None,
- dry_run: Literal[False] = False,
- ) -> str: ...
- @overload
- def snapshot_download(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- revision: str | None = None,
- cache_dir: str | Path | None = None,
- local_dir: str | Path | None = None,
- etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
- force_download: bool = False,
- token: bool | str | None = None,
- local_files_only: bool = False,
- allow_patterns: list[str] | str | None = None,
- ignore_patterns: list[str] | str | None = None,
- max_workers: int = 8,
- tqdm_class: type[base_tqdm] | None = None,
- dry_run: Literal[True],
- ) -> list[DryRunFileInfo]: ...
- @validate_hf_hub_args
- def snapshot_download(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- revision: str | None = None,
- cache_dir: str | Path | None = None,
- local_dir: str | Path | None = None,
- etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
- force_download: bool = False,
- token: bool | str | None = None,
- local_files_only: bool = False,
- allow_patterns: list[str] | str | None = None,
- ignore_patterns: list[str] | str | None = None,
- max_workers: int = 8,
- tqdm_class: type[base_tqdm] | None = None,
- dry_run: bool = False,
- ) -> str | list[DryRunFileInfo]:
- """Download repo files.
- Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
- a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order
- to keep their actual filename relative to that folder. You can also filter which files to download using
- `allow_patterns` and `ignore_patterns`.
- If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this
- option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir`
- to store some metadata related to the downloaded files.While this mechanism is not as robust as the main
- cache-system, it's optimized for regularly pulling the latest version of a repository.
- An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly
- configured. It is also not possible to filter which files to download when cloning a repository using git.
- Args:
- repo_id (`str`):
- A user or an organization name and a repo name separated by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if downloading from a dataset or space,
- `None` or `"model"` if downloading from a model. Default is `None`.
- revision (`str`, *optional*):
- An optional Git revision id which can be a branch name, a tag, or a
- commit hash.
- cache_dir (`str`, `Path`, *optional*):
- Path to the folder where cached files are stored.
- local_dir (`str` or `Path`, *optional*):
- If provided, the downloaded files will be placed under this directory.
- etag_timeout (`float`, *optional*, defaults to `10`):
- When fetching ETag, how many seconds to wait for the server to send
- data before giving up which is passed to `httpx.request`.
- force_download (`bool`, *optional*, defaults to `False`):
- Whether the file should be downloaded even if it already exists in the local cache.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- local_files_only (`bool`, *optional*, defaults to `False`):
- If `True`, avoid downloading the file and return the path to the
- local cached file if it exists.
- allow_patterns (`list[str]` or `str`, *optional*):
- If provided, only files matching at least one pattern are downloaded.
- ignore_patterns (`list[str]` or `str`, *optional*):
- If provided, files matching any of the patterns are not downloaded.
- max_workers (`int`, *optional*):
- Number of concurrent threads to download files (1 thread = 1 file download).
- Defaults to 8.
- tqdm_class (`tqdm`, *optional*):
- If provided, overwrites the default behavior for the progress bar. Passed
- argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior.
- Note that the `tqdm_class` is not passed to each individual download.
- Defaults to the custom HF progress bar that can be disabled by setting
- `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
- dry_run (`bool`, *optional*, defaults to `False`):
- If `True`, perform a dry run without actually downloading the files. Returns a list of
- [`DryRunFileInfo`] objects containing information about what would be downloaded.
- Returns:
- `str` or list of [`DryRunFileInfo`]:
- - If `dry_run=False`: Folder path of the repo snapshot.
- - If `dry_run=True`: A list of [`DryRunFileInfo`] objects containing download information.
- Raises:
- [`~utils.RepositoryNotFoundError`]
- If the repository to download from cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- [`~utils.RevisionNotFoundError`]
- If the revision to download from cannot be found.
- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
- If `token=True` and the token cannot be found.
- [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
- ETag cannot be determined.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- if some parameter value is invalid.
- """
- from ._snapshot_download import snapshot_download
- if token is None:
- # Cannot do `token = token or self.token` as token can be `False`.
- token = self.token
- return snapshot_download(
- repo_id=repo_id,
- repo_type=repo_type,
- revision=revision,
- endpoint=self.endpoint,
- cache_dir=cache_dir,
- local_dir=local_dir,
- library_name=self.library_name,
- library_version=self.library_version,
- user_agent=self.user_agent,
- etag_timeout=etag_timeout,
- force_download=force_download,
- token=token,
- local_files_only=local_files_only,
- allow_patterns=allow_patterns,
- ignore_patterns=ignore_patterns,
- max_workers=max_workers,
- tqdm_class=tqdm_class,
- headers=self.headers,
- dry_run=dry_run,
- )
- def get_safetensors_metadata(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- revision: str | None = None,
- token: bool | str | None = None,
- ) -> SafetensorsRepoMetadata:
- """
- Parse metadata for a safetensors repo on the Hub.
- We first check if the repo has a single safetensors file or a sharded safetensors repo. If it's a single
- safetensors file, we parse the metadata from this file. If it's a sharded safetensors repo, we parse the
- metadata from the index file and then parse the metadata from each shard.
- To parse metadata from a single safetensors file, use [`parse_safetensors_file_metadata`].
- For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
- Args:
- repo_id (`str`):
- A user or an organization name and a repo name separated by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a
- model. Default is `None`.
- revision (`str`, *optional*):
- The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the
- head of the `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SafetensorsRepoMetadata`]: information related to safetensors repo.
- Raises:
- [`NotASafetensorsRepoError`]
- If the repo is not a safetensors repo i.e. doesn't have either a
- `model.safetensors` or a `model.safetensors.index.json` file.
- [`SafetensorsParsingError`]
- If a safetensors file header couldn't be parsed correctly.
- Example:
- ```py
- # Parse repo with single weights file
- >>> metadata = get_safetensors_metadata("bigscience/bloomz-560m")
- >>> metadata
- SafetensorsRepoMetadata(
- metadata=None,
- sharded=False,
- weight_map={'h.0.input_layernorm.bias': 'model.safetensors', ...},
- files_metadata={'model.safetensors': SafetensorsFileMetadata(...)}
- )
- >>> metadata.files_metadata["model.safetensors"].metadata
- {'format': 'pt'}
- # Parse repo with sharded model
- >>> metadata = get_safetensors_metadata("bigscience/bloom")
- Parse safetensors files: 100%|██████████████████████████████████████████| 72/72 [00:12<00:00, 5.78it/s]
- >>> metadata
- SafetensorsRepoMetadata(metadata={'total_size': 352494542848}, sharded=True, weight_map={...}, files_metadata={...})
- >>> len(metadata.files_metadata)
- 72 # All safetensors files have been fetched
- # Parse repo with sharded model
- >>> get_safetensors_metadata("runwayml/stable-diffusion-v1-5")
- NotASafetensorsRepoError: 'runwayml/stable-diffusion-v1-5' is not a safetensors repo. Couldn't find 'model.safetensors.index.json' or 'model.safetensors' files.
- ```
- """
- if self.file_exists( # Single safetensors file => non-sharded model
- repo_id=repo_id,
- filename=constants.SAFETENSORS_SINGLE_FILE,
- repo_type=repo_type,
- revision=revision,
- token=token,
- ):
- file_metadata = self.parse_safetensors_file_metadata(
- repo_id=repo_id,
- filename=constants.SAFETENSORS_SINGLE_FILE,
- repo_type=repo_type,
- revision=revision,
- token=token,
- )
- return SafetensorsRepoMetadata(
- metadata=None,
- sharded=False,
- weight_map={
- tensor_name: constants.SAFETENSORS_SINGLE_FILE for tensor_name in file_metadata.tensors.keys()
- },
- files_metadata={constants.SAFETENSORS_SINGLE_FILE: file_metadata},
- )
- elif self.file_exists( # Multiple safetensors files => sharded with index
- repo_id=repo_id,
- filename=constants.SAFETENSORS_INDEX_FILE,
- repo_type=repo_type,
- revision=revision,
- token=token,
- ):
- # Fetch index
- index_file = self.hf_hub_download(
- repo_id=repo_id,
- filename=constants.SAFETENSORS_INDEX_FILE,
- repo_type=repo_type,
- revision=revision,
- token=token,
- )
- with open(index_file) as f:
- index = json.load(f)
- weight_map = index.get("weight_map", {})
- # Fetch metadata per shard
- files_metadata = {}
- def _parse(filename: str) -> None:
- files_metadata[filename] = self.parse_safetensors_file_metadata(
- repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, token=token
- )
- thread_map(
- _parse,
- set(weight_map.values()),
- desc="Parse safetensors files",
- tqdm_class=hf_tqdm,
- )
- return SafetensorsRepoMetadata(
- metadata=index.get("metadata", None),
- sharded=True,
- weight_map=weight_map,
- files_metadata=files_metadata,
- )
- else:
- # Not a safetensors repo
- raise NotASafetensorsRepoError(
- f"'{repo_id}' is not a safetensors repo. Couldn't find '{constants.SAFETENSORS_INDEX_FILE}' or '{constants.SAFETENSORS_SINGLE_FILE}' files."
- )
- def parse_safetensors_file_metadata(
- self,
- repo_id: str,
- filename: str,
- *,
- repo_type: str | None = None,
- revision: str | None = None,
- token: bool | str | None = None,
- ) -> SafetensorsFileMetadata:
- """
- Parse metadata from a safetensors file on the Hub.
- To parse metadata from all safetensors files in a repo at once, use [`get_safetensors_metadata`].
- For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
- Args:
- repo_id (`str`):
- A user or an organization name and a repo name separated by a `/`.
- filename (`str`):
- The name of the file in the repo.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a
- model. Default is `None`.
- revision (`str`, *optional*):
- The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the
- head of the `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SafetensorsFileMetadata`]: information related to a safetensors file.
- Raises:
- [`NotASafetensorsRepoError`]:
- If the repo is not a safetensors repo i.e. doesn't have either a
- `model.safetensors` or a `model.safetensors.index.json` file.
- [`SafetensorsParsingError`]:
- If a safetensors file header couldn't be parsed correctly.
- """
- url = hf_hub_url(
- repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, endpoint=self.endpoint
- )
- _headers = self._build_hf_headers(token=token)
- context_msg = f"repo '{repo_id}', revision '{revision or constants.DEFAULT_REVISION}'"
- # 1. Fetch first 100kb
- # Empirically, 97% of safetensors files have a metadata size < 100kb (over the top 1000 models on the Hub).
- # We assume fetching 100kb is faster than making 2 GET requests. Therefore we always fetch the first 100kb to
- # avoid the 2nd GET in most cases.
- # See https://github.com/huggingface/huggingface_hub/pull/1855#discussion_r1404286419.
- response = get_session().get(url, headers={**_headers, "range": "bytes=0-100000"})
- hf_raise_for_status(response)
- # 2. Parse and validate metadata size using shared helper
- metadata_size = _get_safetensors_metadata_size(response.content[:8], filename, context_msg)
- # 3.a. Get metadata from payload
- if metadata_size <= 100000:
- metadata_as_bytes = response.content[8 : 8 + metadata_size]
- else: # 3.b. Request full metadata
- response = get_session().get(url, headers={**_headers, "range": f"bytes=8-{metadata_size + 7}"})
- hf_raise_for_status(response)
- metadata_as_bytes = response.content
- # 4. Parse json header using shared helper
- return _parse_safetensors_header(metadata_as_bytes, filename, context_msg)
- @validate_hf_hub_args
- def create_branch(
- self,
- repo_id: str,
- *,
- branch: str,
- revision: str | None = None,
- token: bool | str | None = None,
- repo_type: str | None = None,
- exist_ok: bool = False,
- ) -> None:
- """
- Create a new branch for a repo on the Hub, starting from the specified revision (defaults to `main`).
- To find a revision suiting your needs, you can use [`list_repo_refs`] or [`list_repo_commits`].
- Args:
- repo_id (`str`):
- The repository in which the branch will be created.
- Example: `"user/my-cool-model"`.
- branch (`str`):
- The name of the branch to create.
- revision (`str`, *optional*):
- The git revision to create the branch from. It can be a branch name or
- the OID/SHA of a commit, as a hexadecimal string. Defaults to the head
- of the `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if creating a branch on a dataset or
- space, `None` or `"model"` if tagging a model. Default is `None`.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if branch already exists.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- [`~utils.BadRequestError`]:
- If invalid reference for a branch. Ex: `refs/pr/5` or 'refs/foo/bar'.
- [`~utils.HfHubHTTPError`]:
- If the branch already exists on the repo (error 409) and `exist_ok` is
- set to `False`.
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- branch = quote(branch, safe="")
- # Prepare request
- branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}"
- headers = self._build_hf_headers(token=token)
- payload = {}
- if revision is not None:
- payload["startingPoint"] = revision
- # Create branch
- response = get_session().post(url=branch_url, headers=headers, json=payload)
- try:
- hf_raise_for_status(response)
- except HfHubHTTPError as e:
- if exist_ok and e.response.status_code == 409:
- return
- elif exist_ok and e.response.status_code == 403:
- # No write permission on the namespace but branch might already exist
- try:
- refs = self.list_repo_refs(repo_id=repo_id, repo_type=repo_type, token=token)
- for branch_ref in refs.branches:
- if branch_ref.name == branch:
- return # Branch already exists => do not raise
- except HfHubHTTPError:
- pass # We raise the original error if the branch does not exist
- raise
- @validate_hf_hub_args
- def delete_branch(
- self,
- repo_id: str,
- *,
- branch: str,
- token: bool | str | None = None,
- repo_type: str | None = None,
- ) -> None:
- """
- Delete a branch from a repo on the Hub.
- Args:
- repo_id (`str`):
- The repository in which a branch will be deleted.
- Example: `"user/my-cool-model"`.
- branch (`str`):
- The name of the branch to delete.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if creating a branch on a dataset or
- space, `None` or `"model"` if tagging a model. Default is `None`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- [`~utils.HfHubHTTPError`]:
- If trying to delete a protected branch. Ex: `main` cannot be deleted.
- [`~utils.HfHubHTTPError`]:
- If trying to delete a branch that does not exist.
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- branch = quote(branch, safe="")
- # Prepare request
- branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}"
- headers = self._build_hf_headers(token=token)
- # Delete branch
- response = get_session().delete(url=branch_url, headers=headers)
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def create_tag(
- self,
- repo_id: str,
- *,
- tag: str,
- tag_message: str | None = None,
- revision: str | None = None,
- token: bool | str | None = None,
- repo_type: str | None = None,
- exist_ok: bool = False,
- ) -> None:
- """
- Tag a given commit of a repo on the Hub.
- Args:
- repo_id (`str`):
- The repository in which a commit will be tagged.
- Example: `"user/my-cool-model"`.
- tag (`str`):
- The name of the tag to create.
- tag_message (`str`, *optional*):
- The description of the tag to create.
- revision (`str`, *optional*):
- The git revision to tag. It can be a branch name or the OID/SHA of a
- commit, as a hexadecimal string. Shorthands (7 first characters) are
- also supported. Defaults to the head of the `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if tagging a dataset or
- space, `None` or `"model"` if tagging a model. Default is
- `None`.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if tag already exists.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- [`~utils.RevisionNotFoundError`]:
- If revision is not found (error 404) on the repo.
- [`~utils.HfHubHTTPError`]:
- If the branch already exists on the repo (error 409) and `exist_ok` is
- set to `False`.
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- # Prepare request
- tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{revision}"
- headers = self._build_hf_headers(token=token)
- payload = {"tag": tag}
- if tag_message is not None:
- payload["message"] = tag_message
- # Tag
- response = get_session().post(url=tag_url, headers=headers, json=payload)
- try:
- hf_raise_for_status(response)
- except HfHubHTTPError as e:
- if not (e.response.status_code == 409 and exist_ok):
- raise
- @validate_hf_hub_args
- def delete_tag(
- self,
- repo_id: str,
- *,
- tag: str,
- token: bool | str | None = None,
- repo_type: str | None = None,
- ) -> None:
- """
- Delete a tag from a repo on the Hub.
- Args:
- repo_id (`str`):
- The repository in which a tag will be deleted.
- Example: `"user/my-cool-model"`.
- tag (`str`):
- The name of the tag to delete.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if tagging a dataset or space, `None` or
- `"model"` if tagging a model. Default is `None`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- [`~utils.RevisionNotFoundError`]:
- If tag is not found.
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- tag = quote(tag, safe="")
- # Prepare request
- tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{tag}"
- headers = self._build_hf_headers(token=token)
- # Un-tag
- response = get_session().delete(url=tag_url, headers=headers)
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def get_full_repo_name(
- self,
- model_id: str,
- *,
- organization: str | None = None,
- token: bool | str | None = None,
- ):
- """
- Returns the repository name for a given model ID and optional
- organization.
- Args:
- model_id (`str`):
- The name of the model.
- organization (`str`, *optional*):
- If passed, the repository name will be in the organization
- namespace instead of the user namespace.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `str`: The repository name in the user's namespace
- ({username}/{model_id}) if no organization is passed, and under the
- organization namespace ({organization}/{model_id}) otherwise.
- """
- if organization is None:
- if "/" in model_id:
- username = model_id.split("/")[0]
- else:
- username = self.whoami(token=token)["name"] # type: ignore
- return f"{username}/{model_id}"
- else:
- return f"{organization}/{model_id}"
- @validate_hf_hub_args
- def get_repo_discussions(
- self,
- repo_id: str,
- *,
- author: str | None = None,
- discussion_type: constants.DiscussionTypeFilter | None = None,
- discussion_status: constants.DiscussionStatusFilter | None = None,
- repo_type: str | None = None,
- token: bool | str | None = None,
- ) -> Iterator[Discussion]:
- """
- Fetches Discussions and Pull Requests for the given repo.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- author (`str`, *optional*):
- Pass a value to filter by discussion author. `None` means no filter.
- Default is `None`.
- discussion_type (`str`, *optional*):
- Set to `"pull_request"` to fetch only pull requests, `"discussion"`
- to fetch only discussions. Set to `"all"` or `None` to fetch both.
- Default is `None`.
- discussion_status (`str`, *optional*):
- Set to `"open"` (respectively `"closed"`) to fetch only open
- (respectively closed) discussions. Set to `"all"` or `None`
- to fetch both.
- Default is `None`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if fetching from a dataset or
- space, `None` or `"model"` if fetching from a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterator[Discussion]`: An iterator of [`Discussion`] objects.
- Example:
- Collecting all discussions of a repo in a list:
- ```python
- >>> from huggingface_hub import get_repo_discussions
- >>> discussions_list = list(get_repo_discussions(repo_id="bert-base-uncased"))
- ```
- Iterating over discussions of a repo:
- ```python
- >>> from huggingface_hub import get_repo_discussions
- >>> for discussion in get_repo_discussions(repo_id="bert-base-uncased"):
- ... print(discussion.num, discussion.title)
- ```
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- if discussion_type is not None and discussion_type not in constants.DISCUSSION_TYPES:
- raise ValueError(f"Invalid discussion_type, must be one of {constants.DISCUSSION_TYPES}")
- if discussion_status is not None and discussion_status not in constants.DISCUSSION_STATUS:
- raise ValueError(f"Invalid discussion_status, must be one of {constants.DISCUSSION_STATUS}")
- headers = self._build_hf_headers(token=token)
- path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions"
- params: dict[str, str | int] = {}
- if discussion_type is not None:
- params["type"] = discussion_type
- if discussion_status is not None:
- params["status"] = discussion_status
- if author is not None:
- params["author"] = author
- def _fetch_discussion_page(page_index: int):
- params["p"] = page_index
- resp = get_session().get(path, headers=headers, params=params)
- hf_raise_for_status(resp)
- paginated_discussions = resp.json()
- total = paginated_discussions["count"]
- start = paginated_discussions["start"]
- discussions = paginated_discussions["discussions"]
- has_next = (start + len(discussions)) < total
- return discussions, has_next
- has_next, page_index = True, 0
- while has_next:
- discussions, has_next = _fetch_discussion_page(page_index=page_index)
- for discussion in discussions:
- yield Discussion(
- title=discussion["title"],
- num=discussion["num"],
- author=discussion.get("author", {}).get("name", "deleted"),
- created_at=parse_datetime(discussion["createdAt"]),
- status=discussion["status"],
- repo_id=discussion["repo"]["name"],
- repo_type=discussion["repo"]["type"],
- is_pull_request=discussion["isPullRequest"],
- endpoint=self.endpoint,
- )
- page_index = page_index + 1
- @validate_hf_hub_args
- def get_discussion_details(
- self,
- repo_id: str,
- discussion_num: int,
- *,
- repo_type: str | None = None,
- token: bool | str | None = None,
- ) -> DiscussionWithDetails:
- """Fetches a Discussion's / Pull Request 's details from the Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`DiscussionWithDetails`]
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- if not isinstance(discussion_num, int) or discussion_num <= 0:
- raise ValueError("Invalid discussion_num, must be a positive integer")
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions/{discussion_num}"
- headers = self._build_hf_headers(token=token)
- resp = get_session().get(path, params={"diff": "1"}, headers=headers)
- hf_raise_for_status(resp)
- discussion_details = resp.json()
- is_pull_request = discussion_details["isPullRequest"]
- target_branch = discussion_details["changes"]["base"] if is_pull_request else None
- conflicting_files = discussion_details["filesWithConflicts"] if is_pull_request else None
- merge_commit_oid = discussion_details["changes"].get("mergeCommitId", None) if is_pull_request else None
- return DiscussionWithDetails(
- title=discussion_details["title"],
- num=discussion_details["num"],
- author=discussion_details.get("author", {}).get("name", "deleted"),
- created_at=parse_datetime(discussion_details["createdAt"]),
- status=discussion_details["status"],
- repo_id=discussion_details["repo"]["name"],
- repo_type=discussion_details["repo"]["type"],
- is_pull_request=discussion_details["isPullRequest"],
- events=[deserialize_event(evt) for evt in discussion_details["events"]],
- conflicting_files=conflicting_files,
- target_branch=target_branch,
- merge_commit_oid=merge_commit_oid,
- diff=discussion_details.get("diff"),
- endpoint=self.endpoint,
- )
- @validate_hf_hub_args
- def create_discussion(
- self,
- repo_id: str,
- title: str,
- *,
- token: bool | str | None = None,
- description: str | None = None,
- repo_type: str | None = None,
- pull_request: bool = False,
- ) -> DiscussionWithDetails:
- """Creates a Discussion or Pull Request.
- Pull Requests created programmatically will be in `"draft"` status.
- Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`].
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- title (`str`):
- The title of the discussion. It can be up to 200 characters long,
- and must be at least 3 characters long. Leading and trailing whitespaces
- will be stripped.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- description (`str`, *optional*):
- An optional description for the Pull Request.
- Defaults to `"Discussion opened with the huggingface_hub Python library"`
- pull_request (`bool`, *optional*):
- Whether to create a Pull Request or discussion. If `True`, creates a Pull Request.
- If `False`, creates a discussion. Defaults to `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- Returns: [`DiscussionWithDetails`]
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access."""
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- if description is not None:
- description = description.strip()
- description = (
- description
- if description
- else (
- f"{'Pull Request' if pull_request else 'Discussion'} opened with the"
- " [huggingface_hub Python"
- " library](https://huggingface.co/docs/huggingface_hub)"
- )
- )
- headers = self._build_hf_headers(token=token)
- resp = get_session().post(
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions",
- json={
- "title": title.strip(),
- "description": description,
- "pullRequest": pull_request,
- },
- headers=headers,
- )
- hf_raise_for_status(resp)
- num = resp.json()["num"]
- return self.get_discussion_details(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=num,
- token=token,
- )
- @validate_hf_hub_args
- def create_pull_request(
- self,
- repo_id: str,
- title: str,
- *,
- token: bool | str | None = None,
- description: str | None = None,
- repo_type: str | None = None,
- ) -> DiscussionWithDetails:
- """Creates a Pull Request . Pull Requests created programmatically will be in `"draft"` status.
- Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`];
- This is a wrapper around [`HfApi.create_discussion`].
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- title (`str`):
- The title of the discussion. It can be up to 200 characters long,
- and must be at least 3 characters long. Leading and trailing whitespaces
- will be stripped.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- description (`str`, *optional*):
- An optional description for the Pull Request.
- Defaults to `"Discussion opened with the huggingface_hub Python library"`
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- Returns: [`DiscussionWithDetails`]
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access."""
- return self.create_discussion(
- repo_id=repo_id,
- title=title,
- token=token,
- description=description,
- repo_type=repo_type,
- pull_request=True,
- )
- def _post_discussion_changes(
- self,
- *,
- repo_id: str,
- discussion_num: int,
- resource: str,
- body: dict | None = None,
- token: bool | str | None = None,
- repo_type: str | None = None,
- ) -> httpx.Response:
- """Internal utility to POST changes to a Discussion or Pull Request"""
- if not isinstance(discussion_num, int) or discussion_num <= 0:
- raise ValueError("Invalid discussion_num, must be a positive integer")
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- repo_id = f"{repo_type}s/{repo_id}"
- path = f"{self.endpoint}/api/{repo_id}/discussions/{discussion_num}/{resource}"
- headers = self._build_hf_headers(token=token)
- resp = get_session().post(path, headers=headers, json=body)
- hf_raise_for_status(resp)
- return resp
- @validate_hf_hub_args
- def comment_discussion(
- self,
- repo_id: str,
- discussion_num: int,
- comment: str,
- *,
- token: bool | str | None = None,
- repo_type: str | None = None,
- ) -> DiscussionComment:
- """Creates a new comment on the given Discussion.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- comment (`str`):
- The content of the comment to create. Comments support markdown formatting.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionComment`]: the newly created comment
- Examples:
- ```python
- >>> comment = \"\"\"
- ... Hello @otheruser!
- ...
- ... # This is a title
- ...
- ... **This is bold**, *this is italic* and ~this is strikethrough~
- ... And [this](http://url) is a link
- ... \"\"\"
- >>> HfApi().comment_discussion(
- ... repo_id="username/repo_name",
- ... discussion_num=34
- ... comment=comment
- ... )
- # DiscussionComment(id='deadbeef0000000', type='comment', ...)
- ```
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource="comment",
- body={"comment": comment},
- )
- return deserialize_event(resp.json()["newMessage"]) # type: ignore
- @validate_hf_hub_args
- def rename_discussion(
- self,
- repo_id: str,
- discussion_num: int,
- new_title: str,
- *,
- token: bool | str | None = None,
- repo_type: str | None = None,
- ) -> DiscussionTitleChange:
- """Renames a Discussion.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- new_title (`str`):
- The new title for the discussion
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionTitleChange`]: the title change event
- Examples:
- ```python
- >>> new_title = "New title, fixing a typo"
- >>> HfApi().rename_discussion(
- ... repo_id="username/repo_name",
- ... discussion_num=34
- ... new_title=new_title
- ... )
- # DiscussionTitleChange(id='deadbeef0000000', type='title-change', ...)
- ```
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource="title",
- body={"title": new_title},
- )
- return deserialize_event(resp.json()["newTitle"]) # type: ignore
- @validate_hf_hub_args
- def change_discussion_status(
- self,
- repo_id: str,
- discussion_num: int,
- new_status: Literal["open", "closed"],
- *,
- token: bool | str | None = None,
- comment: str | None = None,
- repo_type: str | None = None,
- ) -> DiscussionStatusChange:
- """Closes or re-opens a Discussion or Pull Request.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- new_status (`str`):
- The new status for the discussion, either `"open"` or `"closed"`.
- comment (`str`, *optional*):
- An optional comment to post with the status change.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionStatusChange`]: the status change event
- Examples:
- ```python
- >>> new_title = "New title, fixing a typo"
- >>> HfApi().rename_discussion(
- ... repo_id="username/repo_name",
- ... discussion_num=34
- ... new_title=new_title
- ... )
- # DiscussionStatusChange(id='deadbeef0000000', type='status-change', ...)
- ```
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- if new_status not in ["open", "closed"]:
- raise ValueError("Invalid status, valid statuses are: 'open' and 'closed'")
- body: dict[str, str] = {"status": new_status}
- if comment and comment.strip():
- body["comment"] = comment.strip()
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource="status",
- body=body,
- )
- return deserialize_event(resp.json()["newStatus"]) # type: ignore
- @validate_hf_hub_args
- def merge_pull_request(
- self,
- repo_id: str,
- discussion_num: int,
- *,
- token: bool | str | None = None,
- comment: str | None = None,
- repo_type: str | None = None,
- ):
- """Merges a Pull Request.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- comment (`str`, *optional*):
- An optional comment to post with the status change.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionStatusChange`]: the status change event
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource="merge",
- body={"comment": comment.strip()} if comment and comment.strip() else None,
- )
- @validate_hf_hub_args
- def edit_discussion_comment(
- self,
- repo_id: str,
- discussion_num: int,
- comment_id: str,
- new_content: str,
- *,
- token: bool | str | None = None,
- repo_type: str | None = None,
- ) -> DiscussionComment:
- """Edits a comment on a Discussion / Pull Request.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- comment_id (`str`):
- The ID of the comment to edit.
- new_content (`str`):
- The new content of the comment. Comments support markdown formatting.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionComment`]: the edited comment
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource=f"comment/{comment_id.lower()}/edit",
- body={"content": new_content},
- )
- return deserialize_event(resp.json()["updatedComment"]) # type: ignore
- @validate_hf_hub_args
- def hide_discussion_comment(
- self,
- repo_id: str,
- discussion_num: int,
- comment_id: str,
- *,
- token: bool | str | None = None,
- repo_type: str | None = None,
- ) -> DiscussionComment:
- """Hides a comment on a Discussion / Pull Request.
- > [!WARNING]
- > Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- comment_id (`str`):
- The ID of the comment to edit.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionComment`]: the hidden comment
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- warnings.warn(
- "Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible.",
- UserWarning,
- )
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource=f"comment/{comment_id.lower()}/hide",
- )
- return deserialize_event(resp.json()["updatedComment"]) # type: ignore
- @validate_hf_hub_args
- def add_space_secret(
- self,
- repo_id: str,
- key: str,
- value: str,
- *,
- description: str | None = None,
- token: bool | str | None = None,
- ) -> None:
- """Adds or updates a secret in a Space.
- Secrets allow to set secret keys or tokens to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- key (`str`):
- Secret key. Example: `"GITHUB_API_KEY"`
- value (`str`):
- Secret value. Example: `"your_github_api_key"`.
- description (`str`, *optional*):
- Secret description. Example: `"Github API key to access the Github API"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- payload = {"key": key, "value": value}
- if description is not None:
- payload["description"] = description
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/secrets",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(r)
- @validate_hf_hub_args
- def delete_space_secret(self, repo_id: str, key: str, *, token: bool | str | None = None) -> None:
- """Deletes a secret from a Space.
- Secrets allow to set secret keys or tokens to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- key (`str`):
- Secret key. Example: `"GITHUB_API_KEY"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- r = get_session().request(
- "DELETE",
- f"{self.endpoint}/api/spaces/{repo_id}/secrets",
- headers=self._build_hf_headers(token=token),
- json={"key": key},
- )
- hf_raise_for_status(r)
- @validate_hf_hub_args
- def get_space_variables(self, repo_id: str, *, token: bool | str | None = None) -> dict[str, SpaceVariable]:
- """Gets all variables from a Space.
- Variables allow to set environment variables to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables
- Args:
- repo_id (`str`):
- ID of the repo to query. Example: `"bigcode/in-the-stack"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- r = get_session().get(
- f"{self.endpoint}/api/spaces/{repo_id}/variables",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(r)
- return {k: SpaceVariable(k, v) for k, v in r.json().items()}
- @validate_hf_hub_args
- def add_space_variable(
- self,
- repo_id: str,
- key: str,
- value: str,
- *,
- description: str | None = None,
- token: bool | str | None = None,
- ) -> dict[str, SpaceVariable]:
- """Adds or updates a variable in a Space.
- Variables allow to set environment variables to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- key (`str`):
- Variable key. Example: `"MODEL_REPO_ID"`
- value (`str`):
- Variable value. Example: `"the_model_repo_id"`.
- description (`str`):
- Description of the variable. Example: `"Model Repo ID of the implemented model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- payload = {"key": key, "value": value}
- if description is not None:
- payload["description"] = description
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/variables",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(r)
- return {k: SpaceVariable(k, v) for k, v in r.json().items()}
- @validate_hf_hub_args
- def delete_space_variable(
- self, repo_id: str, key: str, *, token: bool | str | None = None
- ) -> dict[str, SpaceVariable]:
- """Deletes a variable from a Space.
- Variables allow to set environment variables to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- key (`str`):
- Variable key. Example: `"MODEL_REPO_ID"`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- r = get_session().request(
- "DELETE",
- f"{self.endpoint}/api/spaces/{repo_id}/variables",
- headers=self._build_hf_headers(token=token),
- json={"key": key},
- )
- hf_raise_for_status(r)
- return {k: SpaceVariable(k, v) for k, v in r.json().items()}
- @validate_hf_hub_args
- def get_space_runtime(self, repo_id: str, *, token: bool | str | None = None) -> SpaceRuntime:
- """Gets runtime information about a Space.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- """
- r = get_session().get(
- f"{self.endpoint}/api/spaces/{repo_id}/runtime", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def request_space_hardware(
- self,
- repo_id: str,
- hardware: SpaceHardware,
- *,
- token: bool | str | None = None,
- sleep_time: int | None = None,
- ) -> SpaceRuntime:
- """Request new hardware for a Space.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- hardware (`str` or [`SpaceHardware`]):
- Hardware on which to run the Space. Example: `"t4-medium"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- sleep_time (`int`, *optional*):
- Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want
- your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure
- the sleep time (value is fixed to 48 hours of inactivity).
- See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- > [!TIP]
- > It is also possible to request hardware directly when creating the Space repo! See [`create_repo`] for details.
- """
- if sleep_time is not None and hardware == SpaceHardware.CPU_BASIC:
- warnings.warn(
- "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more"
- " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if"
- " you want to set a custom sleep time, you need to upgrade to a paid Hardware.",
- UserWarning,
- )
- payload: dict[str, Any] = {"flavor": hardware}
- if sleep_time is not None:
- payload["sleepTimeSeconds"] = sleep_time
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/hardware",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def set_space_sleep_time(self, repo_id: str, sleep_time: int, *, token: bool | str | None = None) -> SpaceRuntime:
- """Set a custom sleep time for a Space running on upgraded hardware..
- Your Space will go to sleep after X seconds of inactivity. You are not billed when your Space is in "sleep"
- mode. If a new visitor lands on your Space, it will "wake it up". Only upgraded hardware can have a
- configurable sleep time. To know more about the sleep stage, please refer to
- https://huggingface.co/docs/hub/spaces-gpus#sleep-time.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- sleep_time (`int`, *optional*):
- Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want
- your Space to pause (default behavior for upgraded hardware). For free hardware, you can't configure
- the sleep time (value is fixed to 48 hours of inactivity).
- See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- > [!TIP]
- > It is also possible to set a custom sleep time when requesting hardware with [`request_space_hardware`].
- """
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/sleeptime",
- headers=self._build_hf_headers(token=token),
- json={"seconds": sleep_time},
- )
- hf_raise_for_status(r)
- runtime = SpaceRuntime(r.json())
- hardware = runtime.requested_hardware or runtime.hardware
- if hardware == SpaceHardware.CPU_BASIC:
- warnings.warn(
- "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more"
- " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if"
- " you want to set a custom sleep time, you need to upgrade to a paid Hardware.",
- UserWarning,
- )
- return runtime
- @validate_hf_hub_args
- def pause_space(self, repo_id: str, *, token: bool | str | None = None) -> SpaceRuntime:
- """Pause your Space.
- A paused Space stops executing until manually restarted by its owner. This is different from the sleeping
- state in which free Spaces go after 48h of inactivity. Paused time is not billed to your account, no matter the
- hardware you've selected. To restart your Space, use [`restart_space`] and go to your Space settings page.
- For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause).
- Args:
- repo_id (`str`):
- ID of the Space to pause. Example: `"Salesforce/BLIP2"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about your Space including `stage=PAUSED` and requested hardware.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you
- are not authenticated.
- [`~utils.HfHubHTTPError`]:
- 403 Forbidden: only the owner of a Space can pause it. If you want to manage a Space that you don't
- own, either ask the owner by opening a Discussion or duplicate the Space.
- [`~utils.BadRequestError`]:
- If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide
- a static Space, you can set it to private.
- """
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/pause", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def enable_space_dev_mode(self, repo_id: str, *, token: bool | str | None = None) -> SpaceRuntime:
- """Enable dev mode on a Space.
- Spaces Dev Mode eases the debugging of your application and makes iterating on Spaces faster by allowing you
- to restart your application without stopping the Space container itself. This feature is available as part of
- a PRO or Team & Enterprise plan. See https://huggingface.co/docs/hub/spaces-dev-mode for more details.
- Args:
- repo_id (`str`):
- ID of the Space to enable dev mode. Example: `"Salesforce/BLIP2"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about your Space.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you
- are not authenticated.
- [`~utils.HfHubHTTPError`]:
- 403 Forbidden: only the owner of a Space can set dev mode. If you want to handle a Space that you don't
- own, either ask the owner by opening a Discussion or duplicate the Space.
- [`~utils.BadRequestError`]:
- If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide
- a static Space, you can set it to private.
- """
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/dev-mode",
- headers=self._build_hf_headers(token=token),
- json={"enabled": True},
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def disable_space_dev_mode(
- self,
- repo_id: str,
- *,
- token: bool | str | None = None,
- ) -> SpaceRuntime:
- """Disable dev mode on a Space.
- Spaces Dev Mode eases the debugging of your application and makes iterating on Spaces faster by allowing you
- to restart your application without stopping the Space container itself. This feature is available as part of
- a PRO or Team & Enterprise plan. See https://huggingface.co/docs/hub/spaces-dev-mode for more details.
- Args:
- repo_id (`str`):
- ID of the Space to disable dev mode. Example: `"Salesforce/BLIP2"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about your Space.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you
- are not authenticated.
- [`~utils.HfHubHTTPError`]:
- 403 Forbidden: only the owner of a Space can set dev mode. If you want to handle a Space that you don't
- own, either ask the owner by opening a Discussion or duplicate the Space.
- [`~utils.BadRequestError`]:
- If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide
- a static Space, you can set it to private.
- """
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/dev-mode",
- headers=self._build_hf_headers(token=token),
- json={"enabled": False},
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def restart_space(
- self, repo_id: str, *, token: bool | str | None = None, factory_reboot: bool = False
- ) -> SpaceRuntime:
- """Restart your Space.
- This is the only way to programmatically restart a Space if you've put it on Pause (see [`pause_space`]). You
- must be the owner of the Space to restart it. If you are using an upgraded hardware, your account will be
- billed as soon as the Space is restarted. You can trigger a restart no matter the current state of a Space.
- For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause).
- Args:
- repo_id (`str`):
- ID of the Space to restart. Example: `"Salesforce/BLIP2"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- factory_reboot (`bool`, *optional*):
- If `True`, the Space will be rebuilt from scratch without caching any requirements.
- Returns:
- [`SpaceRuntime`]: Runtime information about your Space.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you
- are not authenticated.
- [`~utils.HfHubHTTPError`]:
- 403 Forbidden: only the owner of a Space can restart it. If you want to restart a Space that you don't
- own, either ask the owner by opening a Discussion or duplicate the Space.
- [`~utils.BadRequestError`]:
- If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide
- a static Space, you can set it to private.
- """
- params = {}
- if factory_reboot:
- params["factory"] = "true"
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/restart", headers=self._build_hf_headers(token=token), params=params
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- def _stream_sse_events(
- self,
- *,
- url: str,
- log_label: str,
- timeout: int,
- follow: bool,
- token: bool | str | None = None,
- skip_previous_events_on_retry: bool = True,
- tolerated_status_codes: tuple[int, ...] = (),
- tolerated_exception_types: tuple[type[Exception], ...] = (),
- on_iteration_end: Callable[[], bool] | None = None,
- ) -> Iterable[dict[str, Any]]:
- # Shared SSE streaming loop with retry/backoff and event-index dedup.
- # Used by Spaces logs and Jobs logs/metrics. Two retry styles:
- # - on_iteration_end is None: retries are the only backstop (Spaces).
- # - on_iteration_end is set: it polls authoritative state after every
- # failed iteration; ReadTimeouts/tolerated errors fall through to it
- # instead of consuming retries (Jobs).
- nb_tries = 0
- max_retries = 5 if follow else 0
- min_wait_time = 1
- max_wait_time = 10
- sleep_time = 0
- start_event_idx = 0
- error_to_retry: Exception | None = None
- while True:
- if error_to_retry is not None:
- logger.warning(f"'{error_to_retry}' thrown while requesting {log_label}")
- logger.warning(f"Retrying in {sleep_time}s [Retry {nb_tries}/{max_retries}].")
- error_to_retry = None
- time.sleep(sleep_time)
- try:
- with get_session().stream(
- "GET",
- url,
- headers=self._build_hf_headers(token=token),
- timeout=timeout,
- ) as response:
- if response.status_code == 200:
- event_idx = -1
- for line in response.iter_lines():
- if line and line.startswith("data: {"):
- event_idx += 1
- if event_idx >= start_event_idx:
- if skip_previous_events_on_retry:
- start_event_idx += 1
- yield json.loads(line[len("data: ") :])
- break
- elif response.status_code not in tolerated_status_codes:
- hf_raise_for_status(response)
- except HfHubHTTPError:
- # Permanent HTTP error (404/403/...). Never retry — fail fast.
- raise
- except httpx.DecodingError:
- # Response ended prematurely.
- break
- except KeyboardInterrupt:
- break
- except (httpx.HTTPError, httpcore.TimeoutException) as err:
- is_no_new_line_timeout = isinstance(err, (httpx.ReadTimeout, httpcore.ReadTimeout))
- if is_no_new_line_timeout and not follow:
- break # no-follow: timeout means the buffer is drained
- if on_iteration_end is not None:
- # Authoritative-state mode: ReadTimeouts and tolerated errors
- # fall through to the post-iteration check without consuming
- # retries. Note: ReadTimeout is handled here regardless of
- # `tolerated_exception_types` — entries in that tuple only
- # fire for non-timeout errors.
- if is_no_new_line_timeout or type(err) in tolerated_exception_types:
- pass
- elif nb_tries >= max_retries:
- raise
- else:
- nb_tries += 1
- sleep_time = min(max_wait_time, max(min_wait_time, sleep_time * 2))
- error_to_retry = err
- else:
- # Retry-only mode: every error in follow mode burns a retry.
- if nb_tries >= max_retries:
- if is_no_new_line_timeout:
- break # follow mode, silent stream, retries exhausted: give up
- raise
- nb_tries += 1
- sleep_time = min(max_wait_time, max(min_wait_time, sleep_time * 2))
- error_to_retry = err
- if on_iteration_end is not None and on_iteration_end():
- break
- def _fetch_space_logs_sse(
- self,
- *,
- repo_id: str,
- build: bool,
- timeout: int,
- follow: bool,
- token: bool | str | None = None,
- ) -> Iterable[dict[str, Any]]:
- log_type = "build" if build else "run"
- yield from self._stream_sse_events(
- url=f"{self.endpoint}/api/spaces/{repo_id}/logs/{log_type}",
- log_label=f"spaces /logs/{log_type} for repo_id={repo_id!r}",
- timeout=timeout,
- follow=follow,
- token=token,
- )
- @validate_hf_hub_args
- def fetch_space_logs(
- self,
- repo_id: str,
- *,
- build: bool = False,
- follow: bool = False,
- token: bool | str | None = None,
- ) -> Iterable[str]:
- """Fetch the run or build logs of a Space on the Hub.
- Useful for debugging a Space that is failing to build or crashing at runtime,
- especially from a script or agentic workflow where reading logs in a browser
- is not an option.
- Args:
- repo_id (`str`):
- ID of the Space. Example: `"bigcode/in-the-stack"`.
- build (`bool`, *optional*, defaults to `False`):
- If `True`, fetch the container build logs (useful when a Space is stuck
- in `BUILD_ERROR`). If `False` (default), fetch the run logs, i.e. the
- stdout/stderr of the running application.
- follow (`bool`, *optional*, defaults to `False`):
- If `True`, stream logs in real-time (blocking) until the server closes
- the stream or `KeyboardInterrupt` is raised. If `False` (default), fetch
- only the currently buffered logs and return immediately (non-blocking,
- like `docker logs`).
- token (`bool` or `str`, *optional*):
- A valid user access token. Defaults to the locally saved token, which is
- the recommended authentication method. Set to `False` to disable
- authentication. See
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Returns:
- `Iterable[str]`: A generator yielding log lines as they become available.
- Example:
- ```python
- >>> from huggingface_hub import fetch_space_logs
- >>> # Non-blocking: print currently available run logs and exit.
- >>> for line in fetch_space_logs("username/my-space"):
- ... print(line, end="")
- >>> # Debug a build failure:
- >>> for line in fetch_space_logs("username/my-space", build=True):
- ... print(line, end="")
- >>> # Stream run logs until the server closes the stream.
- >>> for line in fetch_space_logs("username/my-space", follow=True):
- ... print(line, end="")
- ```
- """
- # - Spaces /logs/{run|build} is SSE with `data: {"data": "...", "timestamp": "..."}` events.
- # - Keep-alives are sent as empty `data:` messages (skipped by the `data: {` filter).
- # - In no-follow mode we use a short read timeout to drain the buffer and return.
- timeout = 120 if follow else 5
- for event in self._fetch_space_logs_sse(
- repo_id=repo_id,
- build=build,
- timeout=timeout,
- follow=follow,
- token=token,
- ):
- yield event["data"]
- @_deprecate_arguments(
- version="2.0",
- deprecated_args={"space_storage"},
- custom_message="Use `space_volumes` to mount volumes on a Space.",
- )
- @validate_hf_hub_args
- def duplicate_repo(
- self,
- from_id: str,
- to_id: str | None = None,
- *,
- repo_type: str | None = None,
- private: bool | None = None,
- visibility: RepoVisibility_T | None = None,
- token: bool | str | None = None,
- exist_ok: bool = False,
- space_hardware: SpaceHardware | None = None,
- space_storage: SpaceStorage | None = None,
- space_sleep_time: int | None = None,
- space_secrets: list[dict[str, str]] | None = None,
- space_variables: list[dict[str, str]] | None = None,
- space_volumes: list[Volume] | None = None,
- ) -> RepoUrl:
- """Duplicate a repo on the Hub (model, dataset, or Space).
- This performs a server-side copy that preserves full git history and LFS objects
- without requiring a local download/upload round-trip.
- Args:
- from_id (`str`):
- ID of the repo to duplicate. Example: `"openai/gdpval"`.
- to_id (`str`, *optional*):
- ID of the new repo. Example: `"myorg/my-gdpval"`. If not provided, the new
- repo will have the same name as the original repo, but in your account.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if duplicating a dataset or Space,
- `None` or `"model"` if duplicating a model. Default is `None`.
- private (`bool`, *optional*):
- Whether the new repo should be private or not. Defaults to the same
- privacy as the original repo. Cannot be passed together with `visibility`.
- visibility (`Literal["public", "private", "protected"]`, *optional*):
- Visibility of the new repo. Can be `"public"` or `"private"`, or `"protected"` for Spaces. Defaults
- to the same visibility as the original repo.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if repo already exists.
- space_hardware (`SpaceHardware` or `str`, *optional*):
- Choice of Hardware if repo_type is "space". Example: `"t4-medium"`. See
- [`SpaceHardware`] for a complete list.
- space_storage (`SpaceStorage` or `str`, *optional*):
- <Deprecated, use `set_space_volumes` instead> Choice of persistent storage tier if repo_type is "space". Example:
- `"small"`. See [`SpaceStorage`] for a complete list.
- space_sleep_time (`int`, *optional*):
- Number of seconds of inactivity to wait before a Space is put to sleep.
- Set to `-1` if you don't want your Space to sleep (default behavior for
- upgraded hardware). For free hardware, you can't configure the sleep time
- (value is fixed to 48 hours of inactivity). Only applicable if repo_type is "space".
- See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
- space_secrets (`list[dict[str, str]]`, *optional*):
- A list of secret keys to set in your Space. Each item is in the form
- `{"key": ..., "value": ..., "description": ...}` where description is optional.
- Only applicable if repo_type is "space".
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
- space_variables (`list[dict[str, str]]`, *optional*):
- A list of public environment variables to set in your Space. Each item is in
- the form `{"key": ..., "value": ..., "description": ...}` where description
- is optional. Only applicable if repo_type is "space".
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables.
- space_volumes (`list[Volume]`, *optional*):
- A list of [`Volume`] objects to mount in the Space at duplication time. Each volume has a `type`
- (`"bucket"`, `"model"`, `"dataset"`, or `"space"`), a `source` (repo or bucket ID), a `mount_path`
- (path inside the container), and optional `revision`, `read_only`, and `path` fields.
- Only applicable if repo_type is "space".
- Returns:
- [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing
- attributes like `endpoint`, `repo_type` and `repo_id`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If one of `from_id` or `to_id` cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- [`HfHubHTTPError`]:
- If the HuggingFace API returned an error
- Example:
- ```python
- >>> from huggingface_hub import duplicate_repo
- # Duplicate a model to your account
- >>> duplicate_repo("google/gemma-7b")
- RepoUrl('https://huggingface.co/nateraw/gemma-7b',...)
- # Duplicate a dataset with a custom name
- >>> duplicate_repo("openai/gdpval", to_id="myorg/my-gdpval", repo_type="dataset")
- RepoUrl('https://huggingface.co/datasets/myorg/my-gdpval',...)
- # Duplicate a Space with custom hardware
- >>> duplicate_repo("multimodalart/dreambooth-training", repo_type="space", space_hardware="t4-medium")
- RepoUrl('https://huggingface.co/spaces/nateraw/dreambooth-training',...)
- ```
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError("Invalid repo type")
- resolved_visibility = _resolve_repo_visibility(private=private, visibility=visibility, repo_type=repo_type)
- # Map repo_type to API path segment
- api_prefix = {
- None: "models",
- constants.REPO_TYPE_MODEL: "models",
- constants.REPO_TYPE_DATASET: "datasets",
- constants.REPO_TYPE_SPACE: "spaces",
- }[repo_type]
- # Parse to_id if provided
- parsed_to_id = RepoUrl(to_id) if to_id is not None else None
- # Infer target repo_id
- to_namespace = (
- parsed_to_id.namespace
- if parsed_to_id is not None and parsed_to_id.namespace is not None
- else self.whoami(token)["name"]
- )
- to_repo_name = parsed_to_id.repo_name if to_id is not None else RepoUrl(from_id).repo_name # type: ignore
- payload: dict[str, Any] = {"repository": f"{to_namespace}/{to_repo_name}"}
- if resolved_visibility is not None:
- payload["visibility"] = resolved_visibility
- # Space-specific options
- space_args: list[tuple[str, str, Any]] = [
- # input arg, payload key, value
- ("space_hardware", "hardware", space_hardware),
- ("space_storage", "storageTier", space_storage),
- ("space_sleep_time", "sleepTimeSeconds", space_sleep_time),
- ("space_secrets", "secrets", space_secrets),
- ("space_variables", "variables", space_variables),
- ("space_volumes", "volumes", [v.to_dict() for v in space_volumes] if space_volumes else None),
- ]
- if repo_type == "space":
- for _, key, value in space_args:
- if value is not None:
- payload[key] = value
- if space_sleep_time is not None and space_hardware == SpaceHardware.CPU_BASIC:
- warnings.warn(
- "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more"
- " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if"
- " you want to set a custom sleep time, you need to upgrade to a paid Hardware.",
- UserWarning,
- )
- else:
- if provided_space_args := [arg for arg, _, value in space_args if value is not None]:
- warnings.warn(f"Ignoring provided {', '.join(provided_space_args)} because repo_type is not 'space'.")
- r = get_session().post(
- f"{self.endpoint}/api/{api_prefix}/{from_id}/duplicate",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- try:
- hf_raise_for_status(r)
- except HfHubHTTPError as err:
- if exist_ok and err.response.status_code == 409:
- pass
- else:
- raise
- return RepoUrl(r.json()["url"], endpoint=self.endpoint)
- @_deprecate_method(version="2.0", message="Use `duplicate_repo` instead.")
- @validate_hf_hub_args
- def duplicate_space(
- self,
- from_id: str,
- to_id: str | None = None,
- *,
- private: bool | None = None,
- visibility: RepoVisibility_T | None = None,
- token: bool | str | None = None,
- exist_ok: bool = False,
- hardware: SpaceHardware | None = None,
- storage: SpaceStorage | None = None,
- sleep_time: int | None = None,
- secrets: list[dict[str, str]] | None = None,
- variables: list[dict[str, str]] | None = None,
- ) -> RepoUrl:
- """Duplicate a Space.
- Programmatically duplicate a Space. The new Space will be created in your account and will be in the same state
- as the original Space (running or paused). You can duplicate a Space no matter the current state of a Space.
- Args:
- from_id (`str`):
- ID of the Space to duplicate. Example: `"pharma/CLIP-Interrogator"`.
- to_id (`str`, *optional*):
- ID of the new Space. Example: `"dog/CLIP-Interrogator"`. If not provided, the new Space will have the same
- name as the original Space, but in your account.
- private (`bool`, *optional*):
- Whether the new Space should be private or not. Defaults to the same privacy as the original Space. Cannot be passed together with `visibility`.
- visibility (`Literal["public", "private", "protected"]`, *optional*):
- Visibility of the new Space. Can be `"public"`, `"private"`, or `"protected"`. Defaults to the same
- visibility as the original Space.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if repo already exists.
- hardware (`SpaceHardware` or `str`, *optional*):
- Choice of Hardware. Example: `"t4-medium"`. See [`SpaceHardware`] for a complete list.
- storage (`SpaceStorage` or `str`, *optional*):
- Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list.
- sleep_time (`int`, *optional*):
- Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want
- your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure
- the sleep time (value is fixed to 48 hours of inactivity).
- See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
- secrets (`list[dict[str, str]]`, *optional*):
- A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
- variables (`list[dict[str, str]]`, *optional*):
- A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables.
- Returns:
- [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing
- attributes like `endpoint`, `repo_type` and `repo_id`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If one of `from_id` or `to_id` cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- [`HfHubHTTPError`]:
- If the HuggingFace API returned an error
- Example:
- ```python
- >>> from huggingface_hub import duplicate_space
- # Duplicate a Space to your account
- >>> duplicate_space("multimodalart/dreambooth-training")
- RepoUrl('https://huggingface.co/spaces/nateraw/dreambooth-training',...)
- # Can set custom destination id and visibility flag.
- >>> duplicate_space("multimodalart/dreambooth-training", to_id="my-dreambooth", visibility="private")
- RepoUrl('https://huggingface.co/spaces/nateraw/my-dreambooth',...)
- ```
- > [!WARNING]
- > `duplicate_space` is deprecated and will be removed in version 2.0. Use [`~HfApi.duplicate_repo`] instead.
- """
- kwargs: dict[str, Any] = {}
- if to_id is not None:
- kwargs["to_id"] = to_id
- return self.duplicate_repo(
- from_id=from_id,
- repo_type="space",
- private=private,
- visibility=visibility,
- token=token,
- exist_ok=exist_ok,
- space_hardware=hardware,
- space_storage=storage,
- space_sleep_time=sleep_time,
- space_secrets=secrets,
- space_variables=variables,
- **kwargs,
- )
- @_deprecate_method(version="2.0", message="Use `set_space_volumes` instead.")
- @validate_hf_hub_args
- def request_space_storage(
- self,
- repo_id: str,
- storage: SpaceStorage,
- *,
- token: bool | str | None = None,
- ) -> SpaceRuntime:
- """Request persistent storage for a Space.
- > [!WARNING]
- > `request_space_storage` is deprecated and will be removed in version 2.0. Use [`set_space_volumes`] instead.
- Args:
- repo_id (`str`):
- ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`.
- storage (`str` or [`SpaceStorage`]):
- Storage tier. Either 'small', 'medium', or 'large'.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- """
- payload: dict[str, SpaceStorage] = {"tier": storage}
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/storage",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @_deprecate_method(version="2.0", message="Use `delete_space_volumes` instead.")
- @validate_hf_hub_args
- def delete_space_storage(
- self,
- repo_id: str,
- *,
- token: bool | str | None = None,
- ) -> SpaceRuntime:
- """Delete persistent storage for a Space.
- > [!WARNING]
- > `delete_space_storage` is deprecated and will be removed in version 2.0. Use [`delete_space_volumes`] instead.
- Args:
- repo_id (`str`):
- ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- Raises:
- [`BadRequestError`]
- If space has no persistent storage.
- """
- r = get_session().delete(
- f"{self.endpoint}/api/spaces/{repo_id}/storage",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def set_space_volumes(
- self,
- repo_id: str,
- volumes: list[Volume],
- *,
- token: bool | str | None = None,
- ) -> None:
- """Set volumes for a Space.
- Sets (or replaces) the list of volumes mounted in the Space. Each volume gives the Space's container access
- to a Hub resource (model, dataset, or storage bucket).
- Args:
- repo_id (`str`):
- ID of the Space to update. Example: `"username/my-space"`.
- volumes (`list[Volume]`):
- List of [`Volume`] objects to mount. Each volume has a `type` (`"bucket"`, `"model"`, `"dataset"`, or
- `"space"`), a `source` (repo or bucket ID), a `mount_path` (path inside the container), and optional
- `revision`, `read_only`, and `path` fields.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`BadRequestError`]:
- If the Space is a static Space (volumes are not supported on static Spaces).
- Example:
- ```python
- >>> from huggingface_hub import HfApi, Volume
- >>> api = HfApi()
- >>> api.set_space_volumes(
- ... "username/my-space",
- ... volumes=[
- ... Volume(type="model", source="username/my-model", mount_path="/models", read_only=True),
- ... Volume(type="bucket", source="username/my-bucket", mount_path="/data"),
- ... ],
- ... )
- ```
- """
- payload = {"volumes": [vol.to_dict() for vol in volumes]}
- r = get_session().put(
- f"{self.endpoint}/api/spaces/{repo_id}/volumes",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(r)
- @validate_hf_hub_args
- def delete_space_volumes(
- self,
- repo_id: str,
- *,
- token: bool | str | None = None,
- ) -> None:
- """Remove all volumes from a Space.
- Args:
- repo_id (`str`):
- ID of the Space to update. Example: `"username/my-space"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`BadRequestError`]:
- If the Space has no volumes attached.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> api.delete_space_volumes("username/my-space")
- ```
- """
- r = get_session().delete(
- f"{self.endpoint}/api/spaces/{repo_id}/volumes",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(r)
- #######################
- # Inference Endpoints #
- #######################
- def list_inference_endpoints(
- self, namespace: str | None = None, *, token: bool | str | None = None
- ) -> list[InferenceEndpoint]:
- """Lists all inference endpoints for the given namespace.
- Args:
- namespace (`str`, *optional*):
- The namespace to list endpoints for. Defaults to the current user. Set to `"*"` to list all endpoints
- from all namespaces (i.e. personal namespace and all orgs the user belongs to).
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- list[`InferenceEndpoint`]: A list of all inference endpoints for the given namespace.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> api.list_inference_endpoints()
- [InferenceEndpoint(name='my-endpoint', ...), ...]
- ```
- """
- # Special case: list all endpoints for all namespaces the user has access to
- if namespace == "*":
- user = self.whoami(token=token)
- # List personal endpoints first
- endpoints: list[InferenceEndpoint] = list_inference_endpoints(namespace=self._get_namespace(token=token))
- # Then list endpoints for all orgs the user belongs to and ignore 401 errors (no billing or no access)
- for org in user.get("orgs", []):
- try:
- endpoints += list_inference_endpoints(namespace=org["name"], token=token)
- except HfHubHTTPError as error:
- if error.response.status_code == 401: # Either no billing or user don't have access)
- logger.debug("Cannot list Inference Endpoints for org '%s': %s", org["name"], error)
- pass
- return endpoints
- # Normal case: list endpoints for a specific namespace
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().get(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return [
- InferenceEndpoint.from_raw(endpoint, namespace=namespace, token=token)
- for endpoint in response.json()["items"]
- ]
- def create_inference_endpoint(
- self,
- name: str,
- *,
- repository: str,
- framework: str,
- accelerator: str,
- instance_size: str,
- instance_type: str,
- region: str,
- vendor: str,
- account_id: str | None = None,
- min_replica: int = 1,
- max_replica: int = 1,
- scaling_metric: InferenceEndpointScalingMetric | None = None,
- scaling_threshold: float | None = None,
- scale_to_zero_timeout: int | None = None,
- revision: str | None = None,
- task: str | None = None,
- custom_image: dict | None = None,
- env: dict[str, str] | None = None,
- secrets: dict[str, str] | None = None,
- type: InferenceEndpointType = InferenceEndpointType.PROTECTED,
- domain: str | None = None,
- path: str | None = None,
- cache_http_responses: bool | None = None,
- tags: list[str] | None = None,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> InferenceEndpoint:
- """Create a new Inference Endpoint.
- Args:
- name (`str`):
- The unique name for the new Inference Endpoint.
- repository (`str`):
- The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
- framework (`str`):
- The machine learning framework used for the model (e.g. `"custom"`).
- accelerator (`str`):
- The hardware accelerator to be used for inference (e.g. `"cpu"`).
- instance_size (`str`):
- The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
- instance_type (`str`):
- The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
- region (`str`):
- The cloud region in which the Inference Endpoint will be created (e.g. `"us-east-1"`).
- vendor (`str`):
- The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. `"aws"`).
- account_id (`str`, *optional*):
- The account ID used to link a VPC to a private Inference Endpoint (if applicable).
- min_replica (`int`, *optional*):
- The minimum number of replicas (instances) to keep running for the Inference Endpoint. To enable
- scaling to zero, set this value to 0 and adjust `scale_to_zero_timeout` accordingly. Defaults to 1.
- max_replica (`int`, *optional*):
- The maximum number of replicas (instances) to scale to for the Inference Endpoint. Defaults to 1.
- scaling_metric (`str` or [`InferenceEndpointScalingMetric `], *optional*):
- The metric reference for scaling. Either "pendingRequests" or "hardwareUsage" when provided. Defaults to
- None (meaning: let the HF Endpoints service specify the metric).
- scaling_threshold (`float`, *optional*):
- The scaling metric threshold used to trigger a scale up. Ignored when scaling metric is not provided.
- Defaults to None (meaning: let the HF Endpoints service specify the threshold).
- scale_to_zero_timeout (`int`, *optional*):
- The duration in minutes before an inactive endpoint is scaled to zero, or no scaling to zero if
- set to None and `min_replica` is not 0. Defaults to None.
- revision (`str`, *optional*):
- The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
- task (`str`, *optional*):
- The task on which to deploy the model (e.g. `"text-classification"`).
- custom_image (`dict`, *optional*):
- A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
- Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
- env (`dict[str, str]`, *optional*):
- Non-secret environment variables to inject in the container environment.
- secrets (`dict[str, str]`, *optional*):
- Secret values to inject in the container environment.
- type ([`InferenceEndpointType]`, *optional*):
- The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`.
- domain (`str`, *optional*):
- The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
- path (`str`, *optional*):
- The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
- cache_http_responses (`bool`, *optional*):
- Whether to cache HTTP responses from the Inference Endpoint. Defaults to `False`.
- tags (`list[str]`, *optional*):
- A list of tags to associate with the Inference Endpoint.
- namespace (`str`, *optional*):
- The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the updated Inference Endpoint.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> endpoint = api.create_inference_endpoint(
- ... "my-endpoint-name",
- ... repository="gpt2",
- ... framework="pytorch",
- ... task="text-generation",
- ... accelerator="cpu",
- ... vendor="aws",
- ... region="us-east-1",
- ... type="protected",
- ... instance_size="x2",
- ... instance_type="intel-icl",
- ... )
- >>> endpoint
- InferenceEndpoint(name='my-endpoint-name', status="pending",...)
- # Run inference on the endpoint
- >>> endpoint.client.text_generation(...)
- "..."
- ```
- ```python
- # Start an Inference Endpoint running Zephyr-7b-beta on TGI
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> endpoint = api.create_inference_endpoint(
- ... "aws-zephyr-7b-beta-0486",
- ... repository="HuggingFaceH4/zephyr-7b-beta",
- ... framework="pytorch",
- ... task="text-generation",
- ... accelerator="gpu",
- ... vendor="aws",
- ... region="us-east-1",
- ... type="protected",
- ... instance_size="x1",
- ... instance_type="nvidia-a10g",
- ... env={
- ... "MAX_BATCH_PREFILL_TOKENS": "2048",
- ... "MAX_INPUT_LENGTH": "1024",
- ... "MAX_TOTAL_TOKENS": "1512",
- ... "MODEL_ID": "/repository"
- ... },
- ... custom_image={
- ... "health_route": "/health",
- ... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0",
- ... },
- ... secrets={"MY_SECRET_KEY": "secret_value"},
- ... tags=["dev", "text-generation"],
- ... )
- ```
- ```python
- # Start an Inference Endpoint running ProsusAI/finbert while scaling to zero in 15 minutes
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> endpoint = api.create_inference_endpoint(
- ... "finbert-classifier",
- ... repository="ProsusAI/finbert",
- ... framework="pytorch",
- ... task="text-classification",
- ... min_replica=0,
- ... scale_to_zero_timeout=15,
- ... accelerator="cpu",
- ... vendor="aws",
- ... region="us-east-1",
- ... type="protected",
- ... instance_size="x2",
- ... instance_type="intel-icl",
- ... )
- >>> endpoint.wait(timeout=300)
- # Run inference on the endpoint
- >>> endpoint.client.text_generation(...)
- TextClassificationOutputElement(label='positive', score=0.8983615040779114)
- ```
- """
- namespace = namespace or self._get_namespace(token=token)
- if custom_image is not None:
- image = (
- custom_image
- if next(iter(custom_image)) in constants.INFERENCE_ENDPOINT_IMAGE_KEYS
- else {"custom": custom_image}
- )
- else:
- image = {"huggingface": {}}
- payload: dict = {
- "accountId": account_id,
- "compute": {
- "accelerator": accelerator,
- "instanceSize": instance_size,
- "instanceType": instance_type,
- "scaling": {
- "maxReplica": max_replica,
- "minReplica": min_replica,
- "scaleToZeroTimeout": scale_to_zero_timeout,
- },
- },
- "model": {
- "framework": framework,
- "repository": repository,
- "revision": revision,
- "task": task,
- "image": image,
- },
- "name": name,
- "provider": {
- "region": region,
- "vendor": vendor,
- },
- "type": type,
- }
- if scaling_metric:
- payload["compute"]["scaling"]["measure"] = {scaling_metric: scaling_threshold} # type: ignore
- if env:
- payload["model"]["env"] = env
- if secrets:
- payload["model"]["secrets"] = secrets
- if domain is not None or path is not None:
- payload["route"] = {}
- if domain is not None:
- payload["route"]["domain"] = domain
- if path is not None:
- payload["route"]["path"] = path
- if cache_http_responses is not None:
- payload["cacheHttpResponses"] = cache_http_responses
- if tags is not None:
- payload["tags"] = tags
- response = get_session().post(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- @experimental
- @validate_hf_hub_args
- def create_inference_endpoint_from_catalog(
- self,
- repo_id: str,
- *,
- name: str | None = None,
- accelerator: Literal["cpu", "gpu", "neuron"] | str | None = None,
- token: bool | str | None = None,
- namespace: str | None = None,
- ) -> InferenceEndpoint:
- """Create a new Inference Endpoint from a model in the Hugging Face Inference Catalog.
- The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference
- and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list
- of available models in the catalog.
- Args:
- repo_id (`str`):
- The ID of the model in the catalog to deploy as an Inference Endpoint.
- name (`str`, *optional*):
- The unique name for the new Inference Endpoint. If not provided, a random name will be generated.
- accelerator (`str`, *optional*):
- The hardware accelerator to be used for inference. Possible values include `"cpu"`, `"gpu"`, and
- `"neuron"`. If not provided, the server will use a default appropriate for the model.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- namespace (`str`, *optional*):
- The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
- Returns:
- [`InferenceEndpoint`]: information about the new Inference Endpoint.
- > [!WARNING]
- > `create_inference_endpoint_from_catalog` is experimental. Its API is subject to change in the future. Please provide feedback
- > if you have any suggestions or requests.
- """
- token = token or self.token or get_token()
- payload: dict = {
- "namespace": namespace or self._get_namespace(token=token),
- "repoId": repo_id,
- }
- if name is not None:
- payload["endpointName"] = name
- if accelerator is not None:
- payload["accelerator"] = accelerator
- response = get_session().post(
- f"{constants.INFERENCE_CATALOG_ENDPOINT}/deploy",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(response)
- data = response.json()["endpoint"]
- return InferenceEndpoint.from_raw(data, namespace=data["name"], token=token)
- @experimental
- @validate_hf_hub_args
- def list_inference_catalog(self, *, token: bool | str | None = None) -> list[str]:
- """List models available in the Hugging Face Inference Catalog.
- The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference
- and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list
- of available models in the catalog.
- Use [`create_inference_endpoint_from_catalog`] to deploy a model from the catalog.
- Args:
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- Returns:
- List[`str`]: A list of model IDs available in the catalog.
- > [!WARNING]
- > `list_inference_catalog` is experimental. Its API is subject to change in the future. Please provide feedback
- > if you have any suggestions or requests.
- """
- response = get_session().get(
- f"{constants.INFERENCE_CATALOG_ENDPOINT}/repo-list",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return response.json()["models"]
- def get_inference_endpoint(
- self, name: str, *, namespace: str | None = None, token: bool | str | None = None
- ) -> InferenceEndpoint:
- """Get information about an Inference Endpoint.
- Args:
- name (`str`):
- The name of the Inference Endpoint to retrieve information about.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the requested Inference Endpoint.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> endpoint = api.get_inference_endpoint("my-text-to-image")
- >>> endpoint
- InferenceEndpoint(name='my-text-to-image', ...)
- # Get status
- >>> endpoint.status
- 'running'
- >>> endpoint.url
- 'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud'
- # Run inference
- >>> endpoint.client.text_to_image(...)
- ```
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().get(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def update_inference_endpoint(
- self,
- name: str,
- *,
- # Compute update
- accelerator: str | None = None,
- instance_size: str | None = None,
- instance_type: str | None = None,
- min_replica: int | None = None,
- max_replica: int | None = None,
- scale_to_zero_timeout: int | None = None,
- scaling_metric: InferenceEndpointScalingMetric | None = None,
- scaling_threshold: float | None = None,
- # Model update
- repository: str | None = None,
- framework: str | None = None,
- revision: str | None = None,
- task: str | None = None,
- custom_image: dict | None = None,
- env: dict[str, str] | None = None,
- secrets: dict[str, str] | None = None,
- # Route update
- domain: str | None = None,
- path: str | None = None,
- # Other
- cache_http_responses: bool | None = None,
- tags: list[str] | None = None,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> InferenceEndpoint:
- """Update an Inference Endpoint.
- This method allows the update of either the compute configuration, the deployed model, the route, or any combination.
- All arguments are optional but at least one must be provided.
- For convenience, you can also update an Inference Endpoint using [`InferenceEndpoint.update`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to update.
- accelerator (`str`, *optional*):
- The hardware accelerator to be used for inference (e.g. `"cpu"`).
- instance_size (`str`, *optional*):
- The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
- instance_type (`str`, *optional*):
- The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
- min_replica (`int`, *optional*):
- The minimum number of replicas (instances) to keep running for the Inference Endpoint.
- max_replica (`int`, *optional*):
- The maximum number of replicas (instances) to scale to for the Inference Endpoint.
- scale_to_zero_timeout (`int`, *optional*):
- The duration in minutes before an inactive endpoint is scaled to zero.
- scaling_metric (`str` or [`InferenceEndpointScalingMetric `], *optional*):
- The metric reference for scaling. Either "pendingRequests" or "hardwareUsage" when provided.
- Defaults to None.
- scaling_threshold (`float`, *optional*):
- The scaling metric threshold used to trigger a scale up. Ignored when scaling metric is not provided.
- Defaults to None.
- repository (`str`, *optional*):
- The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
- framework (`str`, *optional*):
- The machine learning framework used for the model (e.g. `"custom"`).
- revision (`str`, *optional*):
- The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
- task (`str`, *optional*):
- The task on which to deploy the model (e.g. `"text-classification"`).
- custom_image (`dict`, *optional*):
- A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
- Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
- env (`dict[str, str]`, *optional*):
- Non-secret environment variables to inject in the container environment
- secrets (`dict[str, str]`, *optional*):
- Secret values to inject in the container environment.
- domain (`str`, *optional*):
- The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
- path (`str`, *optional*):
- The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
- cache_http_responses (`bool`, *optional*):
- Whether to cache HTTP responses from the Inference Endpoint.
- tags (`list[str]`, *optional*):
- A list of tags to associate with the Inference Endpoint.
- namespace (`str`, *optional*):
- The namespace where the Inference Endpoint will be updated. Defaults to the current user's namespace.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the updated Inference Endpoint.
- """
- namespace = namespace or self._get_namespace(token=token)
- # Populate only the fields that are not None
- payload: dict = defaultdict(lambda: defaultdict(dict))
- if accelerator is not None:
- payload["compute"]["accelerator"] = accelerator
- if instance_size is not None:
- payload["compute"]["instanceSize"] = instance_size
- if instance_type is not None:
- payload["compute"]["instanceType"] = instance_type
- if max_replica is not None:
- payload["compute"]["scaling"]["maxReplica"] = max_replica
- if min_replica is not None:
- payload["compute"]["scaling"]["minReplica"] = min_replica
- if scale_to_zero_timeout is not None:
- payload["compute"]["scaling"]["scaleToZeroTimeout"] = scale_to_zero_timeout
- if scaling_metric:
- payload["compute"]["scaling"]["measure"] = {scaling_metric: scaling_threshold}
- if repository is not None:
- payload["model"]["repository"] = repository
- if framework is not None:
- payload["model"]["framework"] = framework
- if revision is not None:
- payload["model"]["revision"] = revision
- if task is not None:
- payload["model"]["task"] = task
- if custom_image is not None:
- payload["model"]["image"] = {"custom": custom_image}
- if env is not None:
- payload["model"]["env"] = env
- if secrets is not None:
- payload["model"]["secrets"] = secrets
- if domain is not None:
- payload["route"]["domain"] = domain
- if path is not None:
- payload["route"]["path"] = path
- if cache_http_responses is not None:
- payload["cacheHttpResponses"] = cache_http_responses
- if tags is not None:
- payload["tags"] = tags
- response = get_session().put(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def delete_inference_endpoint(
- self, name: str, *, namespace: str | None = None, token: bool | str | None = None
- ) -> None:
- """Delete an Inference Endpoint.
- This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable
- to pause it with [`pause_inference_endpoint`] or scale it to zero with [`scale_to_zero_inference_endpoint`].
- For convenience, you can also delete an Inference Endpoint using [`InferenceEndpoint.delete`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to delete.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().delete(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- def pause_inference_endpoint(
- self, name: str, *, namespace: str | None = None, token: bool | str | None = None
- ) -> InferenceEndpoint:
- """Pause an Inference Endpoint.
- A paused Inference Endpoint will not be charged. It can be resumed at any time using [`resume_inference_endpoint`].
- This is different than scaling the Inference Endpoint to zero with [`scale_to_zero_inference_endpoint`], which
- would be automatically restarted when a request is made to it.
- For convenience, you can also pause an Inference Endpoint using [`pause_inference_endpoint`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to pause.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the paused Inference Endpoint.
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().post(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/pause",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def resume_inference_endpoint(
- self,
- name: str,
- *,
- namespace: str | None = None,
- running_ok: bool = True,
- token: bool | str | None = None,
- ) -> InferenceEndpoint:
- """Resume an Inference Endpoint.
- For convenience, you can also resume an Inference Endpoint using [`InferenceEndpoint.resume`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to resume.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- running_ok (`bool`, *optional*):
- If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to
- `True`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the resumed Inference Endpoint.
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().post(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/resume",
- headers=self._build_hf_headers(token=token),
- )
- try:
- hf_raise_for_status(response)
- except HfHubHTTPError as error:
- # If already running (and it's ok), then fetch current status and return
- if running_ok and error.response.status_code == 400 and "already running" in error.response.text:
- return self.get_inference_endpoint(name, namespace=namespace, token=token)
- # Otherwise, raise the error
- raise
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def scale_to_zero_inference_endpoint(
- self, name: str, *, namespace: str | None = None, token: bool | str | None = None
- ) -> InferenceEndpoint:
- """Scale Inference Endpoint to zero.
- An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a
- cold start delay. This is different than pausing the Inference Endpoint with [`pause_inference_endpoint`], which
- would require a manual resume with [`resume_inference_endpoint`].
- For convenience, you can also scale an Inference Endpoint to zero using [`InferenceEndpoint.scale_to_zero`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to scale to zero.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the scaled-to-zero Inference Endpoint.
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().post(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/scale-to-zero",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def _get_namespace(self, token: bool | str | None = None) -> str:
- """Get the default namespace for the current user."""
- me = self.whoami(token=token)
- if me["type"] == "user":
- return me["name"]
- else:
- raise ValueError(
- "Cannot determine default namespace. You must provide a 'namespace' as input or be logged in as a"
- " user."
- )
- ########################
- # Collection Endpoints #
- ########################
- @validate_hf_hub_args
- def list_collections(
- self,
- *,
- owner: list[str] | str | None = None,
- item: list[str] | str | None = None,
- sort: CollectionSort_T | None = None,
- limit: int | None = None,
- token: bool | str | None = None,
- ) -> Iterable[Collection]:
- """List collections on the Huggingface Hub, given some filters.
- > [!WARNING]
- > When listing collections, the item list per collection is truncated to 4 items maximum. To retrieve all items
- > from a collection, you must use [`get_collection`].
- Args:
- owner (`list[str]` or `str`, *optional*):
- Filter by owner's username.
- item (`list[str]` or `str`, *optional*):
- Filter collections containing a particular items. Example: `"models/teknium/OpenHermes-2.5-Mistral-7B"`, `"datasets/squad"` or `"papers/2311.12983"`.
- sort (`Literal["lastModified", "trending", "upvotes"]`, *optional*):
- Sort collections by last modified, trending or upvotes.
- limit (`int`, *optional*):
- Maximum number of collections to be returned.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[Collection]`: an iterable of [`Collection`] objects.
- """
- # Construct the API endpoint
- path = f"{self.endpoint}/api/collections"
- headers = self._build_hf_headers(token=token)
- params: dict = {}
- if owner is not None:
- params.update({"owner": owner})
- if item is not None:
- params.update({"item": item})
- if sort is not None:
- params.update({"sort": sort})
- if limit is not None:
- params.update({"limit": limit})
- # Paginate over the results until limit is reached
- items = paginate(path, headers=headers, params=params)
- if limit is not None:
- items = islice(items, limit) # Do not iterate over all pages
- # Parse as Collection and return
- for position, collection_data in enumerate(items):
- yield Collection(position=position, **collection_data)
- def get_collection(self, collection_slug: str, *, token: bool | str | None = None) -> Collection:
- """Gets information about a Collection on the Hub.
- Args:
- collection_slug (`str`):
- Slug of the collection of the Hub. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`Collection`]
- Example:
- ```py
- >>> from huggingface_hub import get_collection
- >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026")
- >>> collection.title
- 'Recent models'
- >>> len(collection.items)
- 37
- >>> collection.items[0]
- CollectionItem(
- item_object_id='651446103cd773a050bf64c2',
- item_id='TheBloke/U-Amethyst-20B-AWQ',
- item_type='model',
- position=88,
- note=None
- )
- ```
- """
- r = get_session().get(
- f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(r)
- return Collection(**{**r.json(), "endpoint": self.endpoint})
- def create_collection(
- self,
- title: str,
- *,
- namespace: str | None = None,
- description: str | None = None,
- private: bool = False,
- exists_ok: bool = False,
- token: bool | str | None = None,
- ) -> Collection:
- """Create a new Collection on the Hub.
- Args:
- title (`str`):
- Title of the collection to create. Example: `"Recent models"`.
- namespace (`str`, *optional*):
- Namespace of the collection to create (username or org). Will default to the owner name.
- description (`str`, *optional*):
- Description of the collection to create.
- private (`bool`, *optional*):
- Whether the collection should be private or not. Defaults to `False` (i.e. public collection).
- exists_ok (`bool`, *optional*):
- If `True`, do not raise an error if collection already exists.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`Collection`]
- Example:
- ```py
- >>> from huggingface_hub import create_collection
- >>> collection = create_collection(
- ... title="ICCV 2023",
- ... description="Portfolio of models, papers and demos I presented at ICCV 2023",
- ... )
- >>> collection.slug
- "username/iccv-2023-64f9a55bb3115b4f513ec026"
- ```
- """
- if namespace is None:
- namespace = self.whoami(token)["name"]
- payload = {
- "title": title,
- "namespace": namespace,
- "private": private,
- }
- if description is not None:
- payload["description"] = description
- r = get_session().post(
- f"{self.endpoint}/api/collections", headers=self._build_hf_headers(token=token), json=payload
- )
- try:
- hf_raise_for_status(r)
- except HfHubHTTPError as err:
- if exists_ok and err.response.status_code == 409:
- # Collection already exists and `exists_ok=True`
- slug = r.json()["slug"]
- return self.get_collection(slug, token=token)
- else:
- raise
- return Collection(**{**r.json(), "endpoint": self.endpoint})
- def update_collection_metadata(
- self,
- collection_slug: str,
- *,
- title: str | None = None,
- description: str | None = None,
- position: int | None = None,
- private: bool | None = None,
- theme: str | None = None,
- token: bool | str | None = None,
- ) -> Collection:
- """Update metadata of a collection on the Hub.
- All arguments are optional. Only provided metadata will be updated.
- Args:
- collection_slug (`str`):
- Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- title (`str`):
- Title of the collection to update.
- description (`str`, *optional*):
- Description of the collection to update.
- position (`int`, *optional*):
- New position of the collection in the list of collections of the user.
- private (`bool`, *optional*):
- Whether the collection should be private or not.
- theme (`str`, *optional*):
- Theme of the collection on the Hub.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`Collection`]
- Example:
- ```py
- >>> from huggingface_hub import update_collection_metadata
- >>> collection = update_collection_metadata(
- ... collection_slug="username/iccv-2023-64f9a55bb3115b4f513ec026",
- ... title="ICCV Oct. 2023"
- ... description="Portfolio of models, datasets, papers and demos I presented at ICCV Oct. 2023",
- ... private=False,
- ... theme="pink",
- ... )
- >>> collection.slug
- "username/iccv-oct-2023-64f9a55bb3115b4f513ec026"
- # ^collection slug got updated but not the trailing ID
- ```
- """
- payload = {
- "position": position,
- "private": private,
- "theme": theme,
- "title": title,
- "description": description,
- }
- r = get_session().patch(
- f"{self.endpoint}/api/collections/{collection_slug}",
- headers=self._build_hf_headers(token=token),
- # Only send not-none values to the API
- json={key: value for key, value in payload.items() if value is not None},
- )
- hf_raise_for_status(r)
- return Collection(**{**r.json()["data"], "endpoint": self.endpoint})
- def delete_collection(
- self, collection_slug: str, *, missing_ok: bool = False, token: bool | str | None = None
- ) -> None:
- """Delete a collection on the Hub.
- Args:
- collection_slug (`str`):
- Slug of the collection to delete. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- missing_ok (`bool`, *optional*):
- If `True`, do not raise an error if collection doesn't exists.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import delete_collection
- >>> collection = delete_collection("username/useless-collection-64f9a55bb3115b4f513ec026", missing_ok=True)
- ```
- > [!WARNING]
- > This is a non-revertible action. A deleted collection cannot be restored.
- """
- r = get_session().delete(
- f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token)
- )
- try:
- hf_raise_for_status(r)
- except HfHubHTTPError as err:
- if missing_ok and err.response.status_code == 404:
- # Collection doesn't exists and `missing_ok=True`
- return
- else:
- raise
- def add_collection_item(
- self,
- collection_slug: str,
- item_id: str,
- item_type: CollectionItemType_T,
- *,
- note: str | None = None,
- exists_ok: bool = False,
- token: bool | str | None = None,
- ) -> Collection:
- """Add an item to a collection on the Hub.
- Args:
- collection_slug (`str`):
- Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- item_id (`str`):
- Id of the item to add to the collection. Use the repo_id for repos/spaces/datasets,
- the paper id for papers, the slug of another collection (e.g. `"moonshotai/kimi-k2"`)
- or a bucket id (e.g. `"namespace/bucket-name"`).
- item_type (`str`):
- Type of the item to add. Can be one of `"model"`, `"dataset"`, `"space"`, `"paper"`, `"collection"`
- or `"bucket"`.
- note (`str`, *optional*):
- A note to attach to the item in the collection. The maximum size for a note is 500 characters.
- exists_ok (`bool`, *optional*):
- If `True`, do not raise an error if item already exists.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`Collection`]
- Raises:
- [`HfHubHTTPError`]:
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HfHubHTTPError`]:
- HTTP 404 if the item you try to add to the collection does not exist on the Hub.
- [`HfHubHTTPError`]:
- HTTP 409 if the item you try to add to the collection is already in the collection (and exists_ok=False)
- Example:
- ```py
- >>> from huggingface_hub import add_collection_item
- >>> collection = add_collection_item(
- ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab",
- ... item_id="pierre-loic/climate-news-articles",
- ... item_type="dataset"
- ... )
- >>> collection.items[-1].item_id
- "pierre-loic/climate-news-articles"
- # ^item got added to the collection on last position
- # Add item with a note
- >>> add_collection_item(
- ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab",
- ... item_id="datasets/climate_fever",
- ... item_type="dataset"
- ... note="This dataset adopts the FEVER methodology that consists of 1,535 real-world claims regarding climate-change collected on the internet."
- ... )
- (...)
- ```
- """
- payload: dict[str, Any] = {"item": {"id": item_id, "type": item_type}}
- if note is not None:
- payload["note"] = note
- r = get_session().post(
- f"{self.endpoint}/api/collections/{collection_slug}/items",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- try:
- hf_raise_for_status(r)
- except HfHubHTTPError as err:
- if exists_ok and err.response.status_code == 409:
- # Item already exists and `exists_ok=True`
- return self.get_collection(collection_slug, token=token)
- else:
- raise
- return Collection(**{**r.json(), "endpoint": self.endpoint})
- def update_collection_item(
- self,
- collection_slug: str,
- item_object_id: str,
- *,
- note: str | None = None,
- position: int | None = None,
- token: bool | str | None = None,
- ) -> None:
- """Update an item in a collection.
- Args:
- collection_slug (`str`):
- Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- item_object_id (`str`):
- ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id).
- It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`.
- note (`str`, *optional*):
- A note to attach to the item in the collection. The maximum size for a note is 500 characters.
- position (`int`, *optional*):
- New position of the item in the collection.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import get_collection, update_collection_item
- # Get collection first
- >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026")
- # Update item based on its ID (add note + update position)
- >>> update_collection_item(
- ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026",
- ... item_object_id=collection.items[-1].item_object_id,
- ... note="Newly updated model!"
- ... position=0,
- ... )
- ```
- """
- payload = {"position": position, "note": note}
- r = get_session().patch(
- f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}",
- headers=self._build_hf_headers(token=token),
- # Only send not-none values to the API
- json={key: value for key, value in payload.items() if value is not None},
- )
- hf_raise_for_status(r)
- def delete_collection_item(
- self,
- collection_slug: str,
- item_object_id: str,
- *,
- missing_ok: bool = False,
- token: bool | str | None = None,
- ) -> None:
- """Delete an item from a collection.
- Args:
- collection_slug (`str`):
- Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- item_object_id (`str`):
- ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id).
- It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`.
- missing_ok (`bool`, *optional*):
- If `True`, do not raise an error if item doesn't exists.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import get_collection, delete_collection_item
- # Get collection first
- >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026")
- # Delete item based on its ID
- >>> delete_collection_item(
- ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026",
- ... item_object_id=collection.items[-1].item_object_id,
- ... )
- ```
- """
- r = get_session().delete(
- f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}",
- headers=self._build_hf_headers(token=token),
- )
- try:
- hf_raise_for_status(r)
- except HfHubHTTPError as err:
- if missing_ok and err.response.status_code == 404:
- # Item already deleted and `missing_ok=True`
- return
- else:
- raise
- ##########################
- # Manage access requests #
- ##########################
- @validate_hf_hub_args
- def list_pending_access_requests(
- self, repo_id: str, *, repo_type: str | None = None, token: bool | str | None = None
- ) -> Iterable[AccessRequest]:
- """
- Get pending access requests for a given gated repo.
- A pending request means the user has requested access to the repo but the request has not been processed yet.
- If the approval mode is automatic, this list should be empty. Pending requests can be accepted or rejected
- using [`accept_access_request`] and [`reject_access_request`].
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to get access requests for.
- repo_type (`str`, *optional*):
- The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[AccessRequest]`: An iterable of [`AccessRequest`] objects. Each time contains a `username`, `email`,
- `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will
- be populated with user's answers.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 400 if the repo is not gated.
- [`HfHubHTTPError`]:
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- Example:
- ```py
- >>> from huggingface_hub import list_pending_access_requests, accept_access_request
- # List pending requests
- >>> requests = list(list_pending_access_requests("meta-llama/Llama-2-7b"))
- >>> len(requests)
- 411
- >>> requests[0]
- [
- AccessRequest(
- username='clem',
- fullname='Clem 🤗',
- email='***',
- timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc),
- status='pending',
- fields=None,
- ),
- ...
- ]
- # Accept Clem's request
- >>> accept_access_request("meta-llama/Llama-2-7b", "clem")
- ```
- """
- yield from self._list_access_requests(repo_id, "pending", repo_type=repo_type, token=token)
- @validate_hf_hub_args
- def list_accepted_access_requests(
- self, repo_id: str, *, repo_type: str | None = None, token: bool | str | None = None
- ) -> Iterable[AccessRequest]:
- """
- Get accepted access requests for a given gated repo.
- An accepted request means the user has requested access to the repo and the request has been accepted. The user
- can download any file of the repo. If the approval mode is automatic, this list should contains by default all
- requests. Accepted requests can be cancelled or rejected at any time using [`cancel_access_request`] and
- [`reject_access_request`]. A cancelled request will go back to the pending list while a rejected request will
- go to the rejected list. In both cases, the user will lose access to the repo.
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to get access requests for.
- repo_type (`str`, *optional*):
- The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[AccessRequest]`: An iterable of [`AccessRequest`] objects. Each time contains a `username`, `email`,
- `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will
- be populated with user's answers.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 400 if the repo is not gated.
- [`HfHubHTTPError`]:
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- Example:
- ```py
- >>> from huggingface_hub import list_accepted_access_requests
- >>> requests = list(list_accepted_access_requests("meta-llama/Llama-2-7b"))
- >>> len(requests)
- 411
- >>> requests[0]
- [
- AccessRequest(
- username='clem',
- fullname='Clem 🤗',
- email='***',
- timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc),
- status='accepted',
- fields=None,
- ),
- ...
- ]
- ```
- """
- yield from self._list_access_requests(repo_id, "accepted", repo_type=repo_type, token=token)
- @validate_hf_hub_args
- def list_rejected_access_requests(
- self, repo_id: str, *, repo_type: str | None = None, token: bool | str | None = None
- ) -> Iterable[AccessRequest]:
- """
- Get rejected access requests for a given gated repo.
- A rejected request means the user has requested access to the repo and the request has been explicitly rejected
- by a repo owner (either you or another user from your organization). The user cannot download any file of the
- repo. Rejected requests can be accepted or cancelled at any time using [`accept_access_request`] and
- [`cancel_access_request`]. A cancelled request will go back to the pending list while an accepted request will
- go to the accepted list.
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to get access requests for.
- repo_type (`str`, *optional*):
- The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[AccessRequest]`: An iterable of [`AccessRequest`] objects. Each time contains a `username`, `email`,
- `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will
- be populated with user's answers.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 400 if the repo is not gated.
- [`HfHubHTTPError`]:
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- Example:
- ```py
- >>> from huggingface_hub import list_rejected_access_requests
- >>> requests = list(list_rejected_access_requests("meta-llama/Llama-2-7b"))
- >>> len(requests)
- 411
- >>> requests[0]
- [
- AccessRequest(
- username='clem',
- fullname='Clem 🤗',
- email='***',
- timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc),
- status='rejected',
- fields=None,
- ),
- ...
- ]
- ```
- """
- yield from self._list_access_requests(repo_id, "rejected", repo_type=repo_type, token=token)
- def _list_access_requests(
- self,
- repo_id: str,
- status: Literal["accepted", "rejected", "pending"],
- repo_type: str | None = None,
- token: bool | str | None = None,
- ) -> Iterable[AccessRequest]:
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- for request in paginate(
- f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/{status}",
- params={},
- headers=self._build_hf_headers(token=token),
- ):
- yield AccessRequest(
- username=request["user"]["user"],
- fullname=request["user"]["fullname"],
- email=request["user"].get("email"),
- status=request["status"],
- timestamp=parse_datetime(request["timestamp"]),
- fields=request.get("fields"), # only if custom fields in form
- )
- @validate_hf_hub_args
- def cancel_access_request(
- self, repo_id: str, user: str, *, repo_type: str | None = None, token: bool | str | None = None
- ) -> None:
- """
- Cancel an access request from a user for a given gated repo.
- A cancelled request will go back to the pending list and the user will lose access to the repo.
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to cancel access request for.
- user (`str`):
- The username of the user which access request should be cancelled.
- repo_type (`str`, *optional*):
- The type of the repo to cancel access request for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 400 if the repo is not gated.
- [`HfHubHTTPError`]:
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HfHubHTTPError`]:
- HTTP 404 if the user does not exist on the Hub.
- [`HfHubHTTPError`]:
- HTTP 404 if the user access request cannot be found.
- [`HfHubHTTPError`]:
- HTTP 404 if the user access request is already in the pending list.
- """
- self._handle_access_request(repo_id, user, "pending", repo_type=repo_type, token=token)
- @validate_hf_hub_args
- def accept_access_request(
- self, repo_id: str, user: str, *, repo_type: str | None = None, token: bool | str | None = None
- ) -> None:
- """
- Accept an access request from a user for a given gated repo.
- Once the request is accepted, the user will be able to download any file of the repo and access the community
- tab. If the approval mode is automatic, you don't have to accept requests manually. An accepted request can be
- cancelled or rejected at any time using [`cancel_access_request`] and [`reject_access_request`].
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to accept access request for.
- user (`str`):
- The username of the user which access request should be accepted.
- repo_type (`str`, *optional*):
- The type of the repo to accept access request for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 400 if the repo is not gated.
- [`HfHubHTTPError`]:
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HfHubHTTPError`]:
- HTTP 404 if the user does not exist on the Hub.
- [`HfHubHTTPError`]:
- HTTP 404 if the user access request cannot be found.
- [`HfHubHTTPError`]:
- HTTP 404 if the user access request is already in the accepted list.
- """
- self._handle_access_request(repo_id, user, "accepted", repo_type=repo_type, token=token)
- @validate_hf_hub_args
- def reject_access_request(
- self,
- repo_id: str,
- user: str,
- *,
- repo_type: str | None = None,
- rejection_reason: str | None,
- token: bool | str | None = None,
- ) -> None:
- """
- Reject an access request from a user for a given gated repo.
- A rejected request will go to the rejected list. The user cannot download any file of the repo. Rejected
- requests can be accepted or cancelled at any time using [`accept_access_request`] and [`cancel_access_request`].
- A cancelled request will go back to the pending list while an accepted request will go to the accepted list.
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to reject access request for.
- user (`str`):
- The username of the user which access request should be rejected.
- repo_type (`str`, *optional*):
- The type of the repo to reject access request for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- rejection_reason (`str`, *optional*):
- Optional rejection reason that will be visible to the user (max 200 characters).
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 400 if the repo is not gated.
- [`HfHubHTTPError`]:
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HfHubHTTPError`]:
- HTTP 404 if the user does not exist on the Hub.
- [`HfHubHTTPError`]:
- HTTP 404 if the user access request cannot be found.
- [`HfHubHTTPError`]:
- HTTP 404 if the user access request is already in the rejected list.
- """
- self._handle_access_request(
- repo_id, user, "rejected", repo_type=repo_type, rejection_reason=rejection_reason, token=token
- )
- @validate_hf_hub_args
- def _handle_access_request(
- self,
- repo_id: str,
- user: str,
- status: Literal["accepted", "rejected", "pending"],
- repo_type: str | None = None,
- rejection_reason: str | None = None,
- token: bool | str | None = None,
- ) -> None:
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- payload = {"user": user, "status": status}
- if rejection_reason is not None:
- if status != "rejected":
- raise ValueError("`rejection_reason` can only be passed when rejecting an access request.")
- payload["rejectionReason"] = rejection_reason
- response = get_session().post(
- f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/handle",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def grant_access(
- self, repo_id: str, user: str, *, repo_type: str | None = None, token: bool | str | None = None
- ) -> None:
- """
- Grant access to a user for a given gated repo.
- Granting access don't require for the user to send an access request by themselves. The user is automatically
- added to the accepted list meaning they can download the files You can revoke the granted access at any time
- using [`cancel_access_request`] or [`reject_access_request`].
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to grant access to.
- user (`str`):
- The username of the user to grant access.
- repo_type (`str`, *optional*):
- The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 400 if the repo is not gated.
- [`HfHubHTTPError`]:
- HTTP 400 if the user already has access to the repo.
- [`HfHubHTTPError`]:
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HfHubHTTPError`]:
- HTTP 404 if the user does not exist on the Hub.
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- response = get_session().post(
- f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/grant",
- headers=self._build_hf_headers(token=token),
- json={"user": user},
- )
- hf_raise_for_status(response)
- return response.json()
- ###################
- # Manage webhooks #
- ###################
- @validate_hf_hub_args
- def get_webhook(self, webhook_id: str, *, token: bool | str | None = None) -> WebhookInfo:
- """Get a webhook by its id.
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to get.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the webhook.
- Example:
- ```python
- >>> from huggingface_hub import get_webhook
- >>> webhook = get_webhook("654bbbc16f2ec14d77f109cc")
- >>> print(webhook)
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- job=None,
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- secret="my-secret",
- domains=["repo", "discussion"],
- disabled=False,
- )
- ```
- """
- response = get_session().get(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def list_webhooks(self, *, token: bool | str | None = None) -> list[WebhookInfo]:
- """List all configured webhooks.
- Args:
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `list[WebhookInfo]`:
- List of webhook info objects.
- Example:
- ```python
- >>> from huggingface_hub import list_webhooks
- >>> webhooks = list_webhooks()
- >>> len(webhooks)
- 2
- >>> webhooks[0]
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- secret="my-secret",
- domains=["repo", "discussion"],
- disabled=False,
- )
- ```
- """
- response = get_session().get(
- f"{constants.ENDPOINT}/api/settings/webhooks",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhooks_data = response.json()
- return [
- WebhookInfo(
- id=webhook["id"],
- url=webhook.get("url"),
- job=JobSpec(**webhook["job"]) if webhook.get("job") else None,
- watched=[WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook["watched"]],
- domains=webhook["domains"],
- secret=webhook.get("secret"),
- disabled=webhook["disabled"],
- )
- for webhook in webhooks_data
- ]
- @validate_hf_hub_args
- def create_webhook(
- self,
- *,
- url: str | None = None,
- job_id: str | None = None,
- watched: list[dict | WebhookWatchedItem],
- domains: list[constants.WEBHOOK_DOMAIN_T] | None = None,
- secret: str | None = None,
- token: bool | str | None = None,
- ) -> WebhookInfo:
- """Create a new webhook.
- The webhook can either send a payload to a URL, or trigger a Job to run on Hugging Face infrastructure.
- This function should be called with one of `url` or `job_id`, but not both.
- Args:
- url (`str`):
- URL to send the payload to.
- job_id (`str`):
- ID of the source Job to trigger with the webhook payload in the environment variable WEBHOOK_PAYLOAD.
- Additional environment variables are available for convenience: WEBHOOK_REPO_ID, WEBHOOK_REPO_TYPE and WEBHOOK_SECRET.
- watched (`list[WebhookWatchedItem]`):
- List of [`WebhookWatchedItem`] to be watched by the webhook. It can be users, orgs, models, datasets or spaces.
- Watched items can also be provided as plain dictionaries.
- domains (`list[Literal["repo", "discussion"]]`, optional):
- List of domains to watch. It can be "repo", "discussion" or both.
- secret (`str`, optional):
- A secret to sign the payload with.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the newly created webhook.
- Example:
- Create a webhook that sends a payload to a URL
- ```python
- >>> from huggingface_hub import create_webhook
- >>> payload = create_webhook(
- ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}],
- ... url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- ... domains=["repo", "discussion"],
- ... secret="my-secret",
- ... )
- >>> print(payload)
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- job=None,
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo", "discussion"],
- secret="my-secret",
- disabled=False,
- )
- ```
- Run a Job and then create a webhook that triggers this Job
- ```python
- >>> from huggingface_hub import create_webhook, run_job
- >>> job = run_job(
- ... image="ubuntu",
- ... command=["bash", "-c", r"echo An event occurred in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD"],
- ... )
- >>> payload = create_webhook(
- ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}],
- ... job_id=job.id,
- ... domains=["repo", "discussion"],
- ... secret="my-secret",
- ... )
- >>> print(payload)
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- url=None,
- job=JobSpec(
- docker_image='ubuntu',
- space_id=None,
- command=['bash', '-c', 'echo An event occurred in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD'],
- arguments=[],
- environment={},
- secrets=[],
- flavor='cpu-basic',
- timeout=None,
- tags=None,
- arch=None
- ),
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo", "discussion"],
- secret="my-secret",
- disabled=False,
- )
- ```
- """
- watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched]
- post_webhooks_json: dict = {"watched": watched_dicts}
- if domains is not None:
- post_webhooks_json["domains"] = domains
- if secret is not None:
- post_webhooks_json["secret"] = secret
- if url is not None and job_id is not None:
- raise ValueError("Set `url` or `job_id` but not both.")
- elif url is not None:
- post_webhooks_json["url"] = url
- elif job_id is not None:
- post_webhooks_json["jobSourceId"] = job_id
- else:
- raise ValueError("Missing argument for webhook: `url` or `job_id`.")
- response = get_session().post(
- f"{constants.ENDPOINT}/api/settings/webhooks",
- json=post_webhooks_json,
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def update_webhook(
- self,
- webhook_id: str,
- *,
- url: str | None = None,
- watched: list[dict | WebhookWatchedItem] | None = None,
- domains: list[constants.WEBHOOK_DOMAIN_T] | None = None,
- secret: str | None = None,
- token: bool | str | None = None,
- ) -> WebhookInfo:
- """Update an existing webhook.
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to be updated.
- url (`str`, optional):
- The URL to which the payload will be sent.
- watched (`list[WebhookWatchedItem]`, optional):
- List of items to watch. It can be users, orgs, models, datasets, or spaces.
- Refer to [`WebhookWatchedItem`] for more details. Watched items can also be provided as plain dictionaries.
- domains (`list[Literal["repo", "discussion"]]`, optional):
- The domains to watch. This can include "repo", "discussion", or both.
- secret (`str`, optional):
- A secret to sign the payload with, providing an additional layer of security.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the updated webhook.
- Example:
- ```python
- >>> from huggingface_hub import update_webhook
- >>> updated_payload = update_webhook(
- ... webhook_id="654bbbc16f2ec14d77f109cc",
- ... url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}],
- ... domains=["repo"],
- ... secret="my-secret",
- ... )
- >>> print(updated_payload)
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- job=None,
- url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo"],
- secret="my-secret",
- disabled=False,
- ```
- """
- if watched is None:
- watched = []
- watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched]
- update_json: dict = {"watched": watched_dicts}
- if url is not None:
- update_json["url"] = url
- if domains is not None:
- update_json["domains"] = domains
- if secret is not None:
- update_json["secret"] = secret
- response = get_session().post(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}",
- json=update_json,
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def enable_webhook(self, webhook_id: str, *, token: bool | str | None = None) -> WebhookInfo:
- """Enable a webhook (makes it "active").
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to enable.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the enabled webhook.
- Example:
- ```python
- >>> from huggingface_hub import enable_webhook
- >>> enabled_webhook = enable_webhook("654bbbc16f2ec14d77f109cc")
- >>> enabled_webhook
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- job=None,
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo", "discussion"],
- secret="my-secret",
- disabled=False,
- )
- ```
- """
- response = get_session().post(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/enable",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def disable_webhook(self, webhook_id: str, *, token: bool | str | None = None) -> WebhookInfo:
- """Disable a webhook (makes it "disabled").
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to disable.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the disabled webhook.
- Example:
- ```python
- >>> from huggingface_hub import disable_webhook
- >>> disabled_webhook = disable_webhook("654bbbc16f2ec14d77f109cc")
- >>> disabled_webhook
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- jon=None,
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo", "discussion"],
- secret="my-secret",
- disabled=True,
- )
- ```
- """
- response = get_session().post(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/disable",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def delete_webhook(self, webhook_id: str, *, token: bool | str | None = None) -> None:
- """Delete a webhook.
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to delete.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `None`
- Example:
- ```python
- >>> from huggingface_hub import delete_webhook
- >>> delete_webhook("654bbbc16f2ec14d77f109cc")
- ```
- """
- response = get_session().delete(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- #############
- # Internals #
- #############
- def _build_hf_headers(
- self,
- token: bool | str | None = None,
- library_name: str | None = None,
- library_version: str | None = None,
- user_agent: dict | str | None = None,
- ) -> dict[str, str]:
- """
- Alias for [`build_hf_headers`] that uses the token from [`HfApi`] client
- when `token` is not provided.
- """
- if token is None:
- # Cannot do `token = token or self.token` as token can be `False`.
- token = self.token
- return build_hf_headers(
- token=token,
- library_name=library_name or self.library_name,
- library_version=library_version or self.library_version,
- user_agent=user_agent or self.user_agent,
- headers=self.headers,
- )
- def _prepare_folder_deletions(
- self,
- repo_id: str,
- repo_type: str | None,
- revision: str | None,
- path_in_repo: str,
- delete_patterns: list[str] | str | None,
- token: bool | str | None = None,
- ) -> list[CommitOperationDelete]:
- """Generate the list of Delete operations for a commit to delete files from a repo.
- List remote files and match them against the `delete_patterns` constraints. Returns a list of [`CommitOperationDelete`]
- with the matching items.
- Note: `.gitattributes` file is essential to make a repo work properly on the Hub. This file will always be
- kept even if it matches the `delete_patterns` constraints.
- """
- if delete_patterns is None:
- # If no delete patterns, no need to list and filter remote files
- return []
- # List remote files
- filenames = self.list_repo_files(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token)
- # Compute relative path in repo
- if path_in_repo and path_in_repo not in (".", "./"):
- path_in_repo = path_in_repo.strip("/") + "/" # harmonize
- relpath_to_abspath = {
- file[len(path_in_repo) :]: file for file in filenames if file.startswith(path_in_repo)
- }
- else:
- relpath_to_abspath = {file: file for file in filenames}
- # Apply filter on relative paths and return
- return [
- CommitOperationDelete(path_in_repo=relpath_to_abspath[relpath], is_folder=False)
- for relpath in filter_repo_objects(relpath_to_abspath.keys(), allow_patterns=delete_patterns)
- if relpath_to_abspath[relpath] != ".gitattributes"
- ]
- def _prepare_upload_folder_additions(
- self,
- folder_path: str | Path,
- path_in_repo: str,
- allow_patterns: list[str] | str | None = None,
- ignore_patterns: list[str] | str | None = None,
- repo_type: str | None = None,
- token: bool | str | None = None,
- ) -> list[CommitOperationAdd]:
- """Generate the list of Add operations for a commit to upload a folder.
- Files not matching the `allow_patterns` (allowlist) and `ignore_patterns` (denylist)
- constraints are discarded.
- """
- folder_path = Path(folder_path).expanduser().resolve()
- if not folder_path.is_dir():
- raise ValueError(f"Provided path: '{folder_path}' is not a directory")
- # List files from folder
- relpath_to_abspath = {
- path.relative_to(folder_path).as_posix(): path
- for path in sorted(folder_path.glob("**/*")) # sorted to be deterministic
- if path.is_file()
- }
- # Filter files
- # Patterns are applied on the path relative to `folder_path`. `path_in_repo` is prefixed after the filtering.
- filtered_repo_objects = list(
- filter_repo_objects(
- relpath_to_abspath.keys(), allow_patterns=allow_patterns, ignore_patterns=ignore_patterns
- )
- )
- prefix = f"{path_in_repo.strip('/')}/" if path_in_repo else ""
- # If updating a README.md file, make sure the metadata format is valid
- # It's better to fail early than to fail after all the files have been hashed.
- if "README.md" in filtered_repo_objects:
- self._validate_yaml(
- content=relpath_to_abspath["README.md"].read_text(encoding="utf8"),
- repo_type=repo_type,
- token=token,
- )
- if len(filtered_repo_objects) > 30:
- log = logger.warning if len(filtered_repo_objects) > 200 else logger.info
- log(
- "It seems you are trying to upload a large folder at once. This might take some time and then fail if "
- "the folder is too large. For such cases, it is recommended to upload in smaller batches or to use "
- "`HfApi().upload_large_folder(...)`/`hf upload-large-folder` instead. For more details, "
- "check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder."
- )
- logger.info(f"Start hashing {len(filtered_repo_objects)} files.")
- operations = [
- CommitOperationAdd(
- path_or_fileobj=relpath_to_abspath[relpath], # absolute path on disk
- path_in_repo=prefix + relpath, # "absolute" path in repo
- )
- for relpath in filtered_repo_objects
- ]
- logger.info(f"Finished hashing {len(filtered_repo_objects)} files.")
- return operations
- def _validate_yaml(self, content: str, *, repo_type: str | None = None, token: bool | str | None = None):
- """
- Validate YAML from `README.md`, used before file hashing and upload.
- Args:
- content (`str`):
- Content of `README.md` to validate.
- repo_type (`str`, *optional*):
- The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- if YAML is invalid
- """
- repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL
- headers = self._build_hf_headers(token=token)
- response = get_session().post(
- f"{self.endpoint}/api/validate-yaml",
- json={"content": content, "repoType": repo_type},
- headers=headers,
- )
- # Handle warnings (example: empty metadata)
- response_content = response.json()
- message = "\n".join([f"- {warning.get('message')}" for warning in response_content.get("warnings", [])])
- if message:
- warnings.warn(f"Warnings while validating metadata in README.md:\n{message}")
- # Raise on errors
- try:
- hf_raise_for_status(response)
- except BadRequestError as e:
- errors = response_content.get("errors", [])
- message = "\n".join([f"- {error.get('message')}" for error in errors])
- raise ValueError(f"Invalid metadata in README.md.\n{message}") from e
- def get_user_overview(self, username: str, token: bool | str | None = None) -> User:
- """
- Get an overview of a user on the Hub.
- Args:
- username (`str`):
- Username of the user to get an overview of.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `User`: A [`User`] object with the user's overview.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 404 If the user does not exist on the Hub.
- """
- r = get_session().get(
- f"{constants.ENDPOINT}/api/users/{username}/overview", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(r)
- return User(**r.json())
- @validate_hf_hub_args
- def get_organization_overview(self, organization: str, token: bool | str | None = None) -> Organization:
- """
- Get an overview of an organization on the Hub.
- Args:
- organization (`str`):
- Name of the organization to get an overview of.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended method
- for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Organization`: An [`Organization`] object with the organization's overview.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 If the organization does not exist on the Hub.
- """
- r = get_session().get(
- f"{constants.ENDPOINT}/api/organizations/{organization}/overview",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(r)
- return Organization(**r.json())
- @validate_hf_hub_args
- def list_organization_followers(self, organization: str, token: bool | str | None = None) -> Iterable[User]:
- """
- List followers of an organization on the Hub.
- Args:
- organization (`str`):
- Name of the organization to get the followers of.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[User]`: A list of [`User`] objects with the followers of the organization.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 404 If the organization does not exist on the Hub.
- """
- for follower in paginate(
- path=f"{constants.ENDPOINT}/api/organizations/{organization}/followers",
- params={},
- headers=self._build_hf_headers(token=token),
- ):
- yield User(**follower)
- def list_organization_members(self, organization: str, token: bool | str | None = None) -> Iterable[User]:
- """
- List of members of an organization on the Hub.
- Args:
- organization (`str`):
- Name of the organization to get the members of.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[User]`: A list of [`User`] objects with the members of the organization.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 404 If the organization does not exist on the Hub.
- """
- for member in paginate(
- path=f"{constants.ENDPOINT}/api/organizations/{organization}/members",
- params={},
- headers=self._build_hf_headers(token=token),
- ):
- yield User(**member)
- def list_user_followers(self, username: str, token: bool | str | None = None) -> Iterable[User]:
- """
- Get the list of followers of a user on the Hub.
- Args:
- username (`str`):
- Username of the user to get the followers of.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[User]`: A list of [`User`] objects with the followers of the user.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 404 If the user does not exist on the Hub.
- """
- for follower in paginate(
- path=f"{constants.ENDPOINT}/api/users/{username}/followers",
- params={},
- headers=self._build_hf_headers(token=token),
- ):
- yield User(**follower)
- def list_user_following(self, username: str, token: bool | str | None = None) -> Iterable[User]:
- """
- Get the list of users followed by a user on the Hub.
- Args:
- username (`str`):
- Username of the user to get the users followed by.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[User]`: A list of [`User`] objects with the users followed by the user.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 404 If the user does not exist on the Hub.
- """
- for followed_user in paginate(
- path=f"{constants.ENDPOINT}/api/users/{username}/following",
- params={},
- headers=self._build_hf_headers(token=token),
- ):
- yield User(**followed_user)
- def list_papers(
- self,
- *,
- query: str | None = None,
- limit: int | None = None,
- token: bool | str | None = None,
- ) -> Iterable[PaperInfo]:
- """
- List daily papers on the Hugging Face Hub given a search query.
- Args:
- query (`str`, *optional*):
- A search query string to find papers.
- If provided, returns papers that match the query.
- limit (`int`, *optional*):
- The maximum number of papers to return.
- token (Union[bool, str, None], *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[PaperInfo]`: an iterable of [`huggingface_hub.hf_api.PaperInfo`] objects.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # List all papers with "attention" in their title
- >>> api.list_papers(query="attention")
- ```
- """
- path = f"{self.endpoint}/api/papers/search"
- params: dict[str, Any] = {}
- if query:
- params["q"] = query
- if limit is not None:
- params["limit"] = limit
- r = get_session().get(
- path,
- params=params,
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(r)
- for paper in r.json():
- yield PaperInfo(**paper)
- def paper_info(self, id: str) -> PaperInfo:
- """
- Get information for a paper on the Hub.
- Args:
- id (`str`, **optional**):
- ArXiv id of the paper.
- Returns:
- `PaperInfo`: A `PaperInfo` object.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 404 If the paper does not exist on the Hub.
- """
- path = f"{self.endpoint}/api/papers/{id}"
- r = get_session().get(path)
- hf_raise_for_status(r)
- return PaperInfo(**r.json())
- def read_paper(self, id: str) -> str:
- """
- Get the markdown content of a paper page on the Hub.
- Args:
- id (`str`):
- ArXiv id of the paper.
- Returns:
- `str`: The paper page content as markdown.
- Raises:
- [`HfHubHTTPError`]:
- HTTP 404 If the paper does not exist on the Hub.
- """
- path = f"{self.endpoint}/papers/{id}.md"
- r = get_session().get(path)
- hf_raise_for_status(r)
- return r.text
- def list_daily_papers(
- self,
- *,
- date: str | None = None,
- token: bool | str | None = None,
- week: str | None = None,
- month: str | None = None,
- submitter: str | None = None,
- sort: DailyPapersSort_T | None = None,
- p: int | None = None,
- limit: int | None = None,
- ) -> Iterable[PaperInfo]:
- """
- List the daily papers published on a given date on the Hugging Face Hub.
- Args:
- date (`str`, *optional*):
- Date in ISO format (YYYY-MM-DD) for which to fetch daily papers.
- Defaults to most recent ones.
- token (Union[bool, str, None], *optional*):
- A valid user access token (string). Defaults to the locally saved
- token. To disable authentication, pass `False`.
- week (`str`, *optional*):
- Week in ISO format (YYYY-Www) for which to fetch daily papers. Example, `2025-W09`.
- month (`str`, *optional*):
- Month in ISO format (YYYY-MM) for which to fetch daily papers. Example, `2025-02`.
- submitter (`str`, *optional*):
- Username of the submitter to filter daily papers.
- sort (`Literal["publishedAt", "trending"]`, *optional*):
- Sort order for the daily papers. Can be either by `publishedAt` or by `trending`.
- Defaults to `"publishedAt"`
- p (`int`, *optional*):
- Page number for pagination. Defaults to 0.
- limit (`int`, *optional*):
- Limit of papers to fetch. Defaults to 50.
- Returns:
- `Iterable[PaperInfo]`: an iterable of [`huggingface_hub.hf_api.PaperInfo`] objects.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> list(api.list_daily_papers(date="2025-10-29"))
- ```
- """
- path = f"{self.endpoint}/api/daily_papers"
- params = {
- k: v
- for k, v in {
- "p": p,
- "limit": limit,
- "sort": sort,
- "date": date,
- "week": week,
- "month": month,
- "submitter": submitter,
- }.items()
- if v is not None
- }
- r = get_session().get(path, params=params, headers=self._build_hf_headers(token=token))
- hf_raise_for_status(r)
- for paper in r.json():
- yield PaperInfo(**paper)
- def auth_check(
- self,
- repo_id: str,
- *,
- repo_type: str | None = None,
- token: bool | str | None = None,
- write: bool = False,
- ) -> None:
- """
- Check if the provided user token has access to a specific repository on the Hugging Face Hub.
- This method verifies whether the user, authenticated via the provided token, has access to the specified
- repository. If the repository is not found or if the user lacks the required permissions to access it,
- the method raises an appropriate exception.
- Args:
- repo_id (`str`):
- The repository to check for access. Format should be `"user/repo_name"`.
- Example: `"user/my-cool-model"`.
- repo_type (`str`, *optional*):
- The type of the repository. Should be one of `"model"`, `"dataset"`, or `"space"`.
- If not specified, the default is `"model"`.
- token (`Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- write (`bool`, *optional*):
- If `True`, checks whether the user has content write permission on the repository.
- If `False` (default), only checks for read access.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- Raised if the repository does not exist, is private, or the user does not have access. This can
- occur if the `repo_id` or `repo_type` is incorrect or if the repository is private but the user
- is not authenticated.
- [`~utils.GatedRepoError`]:
- Raised if the repository exists but is gated and the user is not authorized to access it.
- Example:
- Check if the user has access to a repository:
- ```python
- >>> from huggingface_hub import auth_check
- >>> from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
- try:
- auth_check("user/my-cool-model")
- except GatedRepoError:
- # Handle gated repository error
- print("You do not have permission to access this gated repository.")
- except RepositoryNotFoundError:
- # Handle repository not found error
- print("The repository was not found or you do not have access.")
- ```
- In this example:
- - If the user has access, the method completes successfully.
- - If the repository is gated or does not exist, appropriate exceptions are raised, allowing the user
- to handle them accordingly.
- """
- headers = self._build_hf_headers(token=token)
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/auth-check"
- if write:
- path = f"{path}/write"
- r = get_session().get(path, headers=headers)
- hf_raise_for_status(r)
- def run_job(
- self,
- *,
- image: str,
- command: list[str],
- env: dict[str, Any] | None = None,
- secrets: dict[str, Any] | None = None,
- flavor: SpaceHardware | None = None,
- timeout: int | float | str | None = None,
- labels: dict[str, str] | None = None,
- volumes: list[Volume] | None = None,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> JobInfo:
- """
- Run compute Jobs on Hugging Face infrastructure.
- Args:
- image (`str`):
- The Docker image to use.
- Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`.
- Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`.
- command (`list[str]`):
- The command to run. Example: `["echo", "hello"]`.
- env (`dict[str, Any]`, *optional*):
- Defines the environment variables for the Job.
- secrets (`dict[str, Any]`, *optional*):
- Defines the secret environment variables for the Job.
- flavor (`str`, *optional*):
- Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
- Defaults to `"cpu-basic"`.
- timeout (`Union[int, float, str]`, *optional*):
- Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days).
- Example: `300` or `"5m"` for 5 minutes.
- labels (`dict[str, str]`, *optional*):
- Labels to attach to the job (key-value pairs).
- volumes (`list[Volume]`, *optional*):
- Hugging Face Buckets or Repos to mount as volumes in the job container.
- Each volume is a [`Volume`] with `type` (`"bucket"`, `"model"`, `"dataset"`, or `"space"`),
- `source` (e.g. `"username/my-bucket"`), and `mount_path` (e.g. `"/data"`).
- namespace (`str`, *optional*):
- The namespace where the Job will be created. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- Run your first Job:
- ```python
- >>> from huggingface_hub import run_job
- >>> run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"])
- ```
- Run a GPU Job:
- ```python
- >>> from huggingface_hub import run_job
- >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"
- >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"]
- >>> run_job(image=image, command=command, flavor="a10g-small")
- ```
- Run a Job with volumes:
- ```python
- >>> from huggingface_hub import Volume, run_job
- >>> dataset_volume = Volume(type="dataset", source="HuggingFaceFW/fineweb", mount_path="/data")
- >>> output_bucket_volume = Volume(type="bucket", source="username/my-bucket", mount_path="/output")
- >>> image = "duckdb/duckdb"
- >>> command = ["duckdb", "-c", "COPY (SELECT * FROM '/data/**/*.parquet' LIMIT 5) TO '/output/first-rows.parquet'"]
- >>> run_job(image=image, command=command, volumes=[dataset_volume, output_bucket_volume])
- ```
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- job_spec = _create_job_spec(
- image=image,
- command=command,
- env=env,
- secrets=secrets,
- flavor=flavor,
- timeout=timeout,
- labels=labels,
- volumes=volumes,
- )
- response = get_session().post(
- f"{self.endpoint}/api/jobs/{namespace}",
- json=job_spec,
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- job_info = response.json()
- return JobInfo(**job_info, endpoint=self.endpoint)
- def _fetch_running_job_sse(
- self,
- *,
- job_id: str,
- route: str,
- timeout: int,
- skip_previous_events_on_retry: bool,
- tolerated_status_codes: tuple[int, ...] = (),
- tolerated_exception_types: tuple[type[Exception], ...] = (),
- follow: bool = True,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> Iterable[dict[str, Any]]:
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- def has_job_finished() -> bool:
- # We don't use http_backoff: this is the authoritative check that
- # decides whether to keep streaming.
- job_status_response = get_session().get(
- f"{self.endpoint}/api/jobs/{namespace}/{job_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(job_status_response)
- job_status = job_status_response.json()
- return "status" in job_status and job_status["status"]["stage"] not in ("RUNNING", "UPDATING")
- yield from self._stream_sse_events(
- url=f"{self.endpoint}/api/jobs/{namespace}/{job_id}/{route}",
- log_label=f"jobs /{route} for {job_id=}",
- timeout=timeout,
- follow=follow,
- token=token,
- skip_previous_events_on_retry=skip_previous_events_on_retry,
- tolerated_status_codes=tolerated_status_codes,
- tolerated_exception_types=tolerated_exception_types,
- on_iteration_end=has_job_finished,
- )
- def fetch_job_logs(
- self,
- *,
- job_id: str,
- namespace: str | None = None,
- follow: bool = False,
- token: bool | str | None = None,
- ) -> Iterable[str]:
- """
- Fetch all the logs from a compute Job on Hugging Face infrastructure.
- Args:
- job_id (`str`):
- ID of the Job.
- namespace (`str`, *optional*):
- The namespace where the Job is running. Defaults to the current user's namespace.
- follow (`bool`, *optional*):
- If `True`, stream logs in real-time until the job completes (blocking).
- If `False` (default), fetch only the currently available logs and return immediately (non-blocking).
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- ```python
- >>> from huggingface_hub import fetch_job_logs, run_job
- >>> job = run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"])
- >>> for log in fetch_job_logs(job_id=job.id):
- ... print(log)
- Hello from HF compute!
- >>> # Non-blocking: fetch only currently available logs
- >>> for log in fetch_job_logs(job_id=job.id, follow=False):
- ... print(log)
- ```
- """
- # - We need to retry because sometimes the /logs doesn't return logs when the job just started.
- # (for example it can return only two lines: one for "Job started" and one empty line)
- # - Timeouts can happen in case of build errors
- # - ChunkedEncodingError can happen in case of stopped logging in the middle of streaming
- # - Infinite empty log stream can happen in case of build error
- # (the logs stream is infinite and empty except for the Job started message)
- # - there is a ": keep-alive" every 30 seconds
- seconds_between_keep_alive = 30
- # When not following, use a short timeout: the server replays historical logs
- # quickly, then pauses waiting for new events (~30s keep-alive). 5 seconds is
- # enough to receive all buffered logs.
- timeout = 4 * seconds_between_keep_alive if follow else 5
- for event in self._fetch_running_job_sse(
- job_id=job_id,
- route="logs",
- timeout=timeout,
- skip_previous_events_on_retry=True,
- follow=follow,
- namespace=namespace,
- token=token,
- ):
- # timestamp = event["timestamp"]
- if not event["data"].startswith("===== Job started"):
- log = event["data"]
- yield log
- def fetch_job_metrics(
- self,
- *,
- job_id: str,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> Iterable[dict[str, Any]]:
- """
- Fetch all the live metrics from a compute Job on Hugging Face infrastructure.
- Args:
- job_id (`str`):
- ID of the Job.
- namespace (`str`, *optional*):
- The namespace where the Job is running. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- ```python
- >>> from huggingface_hub import fetch_job_metrics, run_job
- >>> job = run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"], flavor="a10g-small")
- >>> for metrics in fetch_job_metrics(job_id=job.id):
- ... print(metrics)
- {
- "cpu_usage_pct": 0,
- "cpu_millicores": 3500,
- "memory_used_bytes": 1306624,
- "memory_total_bytes": 15032385536,
- "rx_bps": 0,
- "tx_bps": 0,
- "gpus": {
- "882fa930": {
- "utilization": 0,
- "memory_used_bytes": 0,
- "memory_total_bytes": 22836000000
- }
- },
- "replica": "57vr7"
- }
- ```
- """
- # - there is one "metric" event every second, like this:
- # event: metric
- # data: {"cpu_usage_pct":0,"cpu_millicores":3500,"memory_used_bytes":1417216,"memory_total_bytes":15032385536,"rx_bps":0,"tx_bps":0,"gpus":{"d901cd7f":{"utilization":0,"memory_used_bytes":0,"memory_total_bytes":22836000000}},"replica":"j6qz9"}
- # - the stream doesn't end when the job finishes, so we rely on timeouts (httpx.NetworkError with Timeout as cause)
- # - httpx.ReadTimeout can happen if the job is marked as running but the hardware is not available yet, that we can ignore
- # - it returns an internal error 500 if the job has already finished, we simply ignore it
- # - ChunkedEncodingError can happen in case of stopped logging in the middle of streaming
- # - there is a ": keep-alive" every 30 seconds
- seconds_between_events = 1
- yield from self._fetch_running_job_sse(
- job_id=job_id,
- route="metrics",
- timeout=10 * seconds_between_events,
- skip_previous_events_on_retry=False,
- tolerated_status_codes=(500,),
- namespace=namespace,
- token=token,
- )
- def list_jobs(
- self,
- *,
- timeout: int | None = None,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> list[JobInfo]:
- """
- List compute Jobs on Hugging Face infrastructure.
- Args:
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- namespace (`str`, *optional*):
- The namespace from where it lists the jobs. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = whoami(token=token)["name"]
- response = get_session().get(
- f"{self.endpoint}/api/jobs/{namespace}",
- headers=self._build_hf_headers(token=token),
- timeout=timeout,
- )
- response.raise_for_status()
- return [JobInfo(**job_info, endpoint=self.endpoint) for job_info in response.json()]
- def list_jobs_hardware(self, token: bool | str | None = None) -> list[JobHardware]:
- """
- List available hardware options for Jobs on Hugging Face infrastructure.
- Returns:
- `list[JobHardware]`: A list of available hardware configurations.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> hardware_list = api.list_jobs_hardware()
- >>> hardware_list[0]
- JobHardware(name='cpu-basic', pretty_name='CPU Basic', cpu='2 vCPU', ram='16 GB', accelerator=None, unit_cost_micro_usd=167, unit_cost_usd=0.000167, unit_label='minute')
- >>> hardware_list[0].name
- 'cpu-basic'
- # Filter GPU options
- >>> gpu_hardware = [hw for hw in hardware_list if hw.accelerator is not None]
- >>> gpu_hardware[0].accelerator.model
- 'T4'
- ```
- """
- response = get_session().get(f"{self.endpoint}/api/jobs/hardware", headers=self._build_hf_headers(token=token))
- hf_raise_for_status(response)
- return [JobHardware(**hardware) for hardware in response.json()]
- def inspect_job(
- self,
- *,
- job_id: str,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> JobInfo:
- """
- Inspect a compute Job on Hugging Face infrastructure.
- Args:
- job_id (`str`):
- ID of the Job.
- namespace (`str`, *optional*):
- The namespace where the Job is running. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- ```python
- >>> from huggingface_hub import inspect_job, run_job
- >>> job = run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"])
- >>> inspect_job(job.id)
- JobInfo(
- id='68780d00bbe36d38803f645f',
- created_at=datetime.datetime(2025, 7, 16, 20, 35, 12, 808000, tzinfo=datetime.timezone.utc),
- docker_image='python:3.12',
- space_id=None,
- command=['python', '-c', "print('Hello from HF compute!')"],
- arguments=[],
- environment={},
- secrets={},
- flavor='cpu-basic',
- status=JobStatus(stage='RUNNING', message=None)
- )
- ```
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- response = get_session().get(
- f"{self.endpoint}/api/jobs/{namespace}/{job_id}",
- headers=self._build_hf_headers(token=token),
- )
- response.raise_for_status()
- return JobInfo(**response.json(), endpoint=self.endpoint)
- def cancel_job(
- self,
- *,
- job_id: str,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> None:
- """
- Cancel a compute Job on Hugging Face infrastructure.
- Args:
- job_id (`str`):
- ID of the Job.
- namespace (`str`, *optional*):
- The namespace where the Job is running. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- get_session().post(
- f"{self.endpoint}/api/jobs/{namespace}/{job_id}/cancel",
- headers=self._build_hf_headers(token=token),
- ).raise_for_status()
- @experimental
- def run_uv_job(
- self,
- script: str,
- *,
- script_args: list[str] | None = None,
- dependencies: list[str] | None = None,
- python: str | None = None,
- image: str | None = None,
- env: dict[str, Any] | None = None,
- secrets: dict[str, Any] | None = None,
- flavor: SpaceHardware | None = None,
- timeout: int | float | str | None = None,
- labels: dict[str, str] | None = None,
- volumes: list[Volume] | None = None,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> JobInfo:
- """
- Run a UV script Job on Hugging Face infrastructure.
- Args:
- script (`str`):
- Path or URL of the UV script, or a command.
- script_args (`list[str]`, *optional*)
- Arguments to pass to the script or command.
- dependencies (`list[str]`, *optional*)
- Dependencies to use to run the UV script.
- python (`str`, *optional*)
- Use a specific Python version. Default is 3.12.
- image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"):
- Use a custom Docker image with `uv` installed.
- env (`dict[str, Any]`, *optional*):
- Defines the environment variables for the Job.
- secrets (`dict[str, Any]`, *optional*):
- Defines the secret environment variables for the Job.
- flavor (`str`, *optional*):
- Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
- Defaults to `"cpu-basic"`.
- timeout (`Union[int, float, str]`, *optional*):
- Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days).
- Example: `300` or `"5m"` for 5 minutes.
- labels (`dict[str, str]`, *optional*):
- Labels to attach to the job (key-value pairs).
- volumes (`list[Volume]`, *optional*):
- Hugging Face Buckets or Repos to mount as volumes in the job container.
- Each volume is a [`Volume`] with `type` (`"bucket"`, `"model"`, `"dataset"`, or `"space"`),
- `source` (e.g. `"username/my-bucket"`), and `mount_path` (e.g. `"/data"`).
- namespace (`str`, *optional*):
- The namespace where the Job will be created. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- Run a script from a URL:
- ```python
- >>> from huggingface_hub import run_uv_job
- >>> script = "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/trl/scripts/sft.py"
- >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"]
- >>> run_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small")
- ```
- Run a local script:
- ```python
- >>> from huggingface_hub import run_uv_job
- >>> script = "my_sft.py"
- >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"]
- >>> run_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small")
- ```
- Run a command:
- ```python
- >>> from huggingface_hub import run_uv_job
- >>> script = "lighteval"
- >>> script_args= ["endpoint", "inference-providers", "model_name=openai/gpt-oss-20b,provider=auto", "lighteval|gsm8k|0|0"]
- >>> run_uv_job(script, script_args=script_args, dependencies=["lighteval"], flavor="a10g-small")
- ```
- Mount volumes, e.g. to save model checkpoints during training:
- ```python
- >>> from huggingface_hub import Volume, run_uv_job
- >>> script = "my_sft.py"
- >>> script_args = ["--output_dir", "/training-outputs/training-v3-final", ...]
- >>> checkpoints_bucket = Volume(type="bucket", source="username/my-bucket", mount_path="/training-outputs")
- >>> run_uv_job(script, script_args=script_args, volumes=[checkpoints_bucket])
- ```
- """
- image = image or "ghcr.io/astral-sh/uv:python3.12-bookworm"
- env = env or {}
- secrets = secrets or {}
- # Build command
- command, env, secrets = self._create_uv_command_env_and_secrets(
- script=script,
- script_args=script_args,
- dependencies=dependencies,
- python=python,
- env=env,
- secrets=secrets,
- namespace=namespace,
- token=token,
- )
- # Create RunCommand args
- return self.run_job(
- image=image,
- command=command,
- env=env,
- secrets=secrets,
- flavor=flavor,
- timeout=timeout,
- labels=labels,
- volumes=volumes,
- namespace=namespace,
- token=token,
- )
- def create_scheduled_job(
- self,
- *,
- image: str,
- command: list[str],
- schedule: str,
- suspend: bool | None = None,
- concurrency: bool | None = None,
- env: dict[str, Any] | None = None,
- secrets: dict[str, Any] | None = None,
- flavor: SpaceHardware | None = None,
- timeout: int | float | str | None = None,
- labels: dict[str, str] | None = None,
- volumes: list[Volume] | None = None,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> ScheduledJobInfo:
- """
- Create scheduled compute Jobs on Hugging Face infrastructure.
- Args:
- image (`str`):
- The Docker image to use.
- Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`.
- Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`.
- command (`list[str]`):
- The command to run. Example: `["echo", "hello"]`.
- schedule (`str`):
- One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
- CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday).
- suspend (`bool`, *optional*):
- If True, the scheduled Job is suspended (paused). Defaults to False.
- concurrency (`bool`, *optional*):
- If True, multiple instances of this Job can run concurrently. Defaults to False.
- env (`dict[str, Any]`, *optional*):
- Defines the environment variables for the Job.
- secrets (`dict[str, Any]`, *optional*):
- Defines the secret environment variables for the Job.
- flavor (`str`, *optional*):
- Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
- Defaults to `"cpu-basic"`.
- timeout (`Union[int, float, str]`, *optional*):
- Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days).
- Example: `300` or `"5m"` for 5 minutes.
- labels (`dict[str, str]`, *optional*):
- Labels to attach to the job (key-value pairs).
- volumes (`list[Volume]`, *optional*):
- Hugging Face Buckets or Repos to mount as volumes in the job container.
- Each volume is a [`Volume`] with `type` (`"bucket"`, `"model"`, `"dataset"`, or `"space"`),
- `source` (e.g. `"username/my-bucket"`), and `mount_path` (e.g. `"/data"`).
- namespace (`str`, *optional*):
- The namespace where the Job will be created. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- Create your first scheduled Job:
- ```python
- >>> from huggingface_hub import create_scheduled_job
- >>> create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"], schedule="@hourly")
- ```
- Use a CRON schedule expression:
- ```python
- >>> from huggingface_hub import create_scheduled_job
- >>> create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('this runs every 5min')"], schedule="*/5 * * * *")
- ```
- Create a scheduled GPU Job:
- ```python
- >>> from huggingface_hub import create_scheduled_job
- >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"
- >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"]
- >>> create_scheduled_job(image, command, flavor="a10g-small", schedule="@hourly")
- ```
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- # prepare payload to send to HF Jobs API
- job_spec = _create_job_spec(
- image=image,
- command=command,
- env=env,
- secrets=secrets,
- flavor=flavor,
- timeout=timeout,
- labels=labels,
- volumes=volumes,
- )
- input_json: dict[str, Any] = {
- "jobSpec": job_spec,
- "schedule": schedule,
- }
- if concurrency is not None:
- input_json["concurrency"] = concurrency
- if suspend is not None:
- input_json["suspend"] = suspend
- response = get_session().post(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}",
- json=input_json,
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- scheduled_job_info = response.json()
- return ScheduledJobInfo(**scheduled_job_info)
- def list_scheduled_jobs(
- self,
- *,
- timeout: int | None = None,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> list[ScheduledJobInfo]:
- """
- List scheduled compute Jobs on Hugging Face infrastructure.
- Args:
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- namespace (`str`, *optional*):
- The namespace from where it lists the jobs. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- response = get_session().get(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}",
- headers=self._build_hf_headers(token=token),
- timeout=timeout,
- )
- hf_raise_for_status(response)
- return [ScheduledJobInfo(**scheduled_job_info) for scheduled_job_info in response.json()]
- def inspect_scheduled_job(
- self,
- *,
- scheduled_job_id: str,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> ScheduledJobInfo:
- """
- Inspect a scheduled compute Job on Hugging Face infrastructure.
- Args:
- scheduled_job_id (`str`):
- ID of the scheduled Job.
- namespace (`str`, *optional*):
- The namespace where the scheduled Job is. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- ```python
- >>> from huggingface_hub import inspect_job, create_scheduled_job
- >>> scheduled_job = create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"], schedule="@hourly")
- >>> inspect_scheduled_job(scheduled_job.id)
- ```
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- response = get_session().get(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return ScheduledJobInfo(**response.json())
- def delete_scheduled_job(
- self,
- *,
- scheduled_job_id: str,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> None:
- """
- Delete a scheduled compute Job on Hugging Face infrastructure.
- Args:
- scheduled_job_id (`str`):
- ID of the scheduled Job.
- namespace (`str`, *optional*):
- The namespace where the scheduled Job is. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- response = get_session().delete(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- def suspend_scheduled_job(
- self,
- *,
- scheduled_job_id: str,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> None:
- """
- Suspend (pause) a scheduled compute Job on Hugging Face infrastructure.
- Args:
- scheduled_job_id (`str`):
- ID of the scheduled Job.
- namespace (`str`, *optional*):
- The namespace where the scheduled Job is. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- get_session().post(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}/suspend",
- headers=self._build_hf_headers(token=token),
- ).raise_for_status()
- def resume_scheduled_job(
- self,
- *,
- scheduled_job_id: str,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> None:
- """
- Resume (unpause) a scheduled compute Job on Hugging Face infrastructure.
- Args:
- scheduled_job_id (`str`):
- ID of the scheduled Job.
- namespace (`str`, *optional*):
- The namespace where the scheduled Job is. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- get_session().post(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}/resume",
- headers=self._build_hf_headers(token=token),
- ).raise_for_status()
- @experimental
- def create_scheduled_uv_job(
- self,
- script: str,
- *,
- script_args: list[str] | None = None,
- schedule: str,
- suspend: bool | None = None,
- concurrency: bool | None = None,
- dependencies: list[str] | None = None,
- python: str | None = None,
- image: str | None = None,
- env: dict[str, Any] | None = None,
- secrets: dict[str, Any] | None = None,
- flavor: SpaceHardware | None = None,
- timeout: int | float | str | None = None,
- labels: dict[str, str] | None = None,
- volumes: list[Volume] | None = None,
- namespace: str | None = None,
- token: bool | str | None = None,
- ) -> ScheduledJobInfo:
- """
- Run a UV script Job on Hugging Face infrastructure.
- Args:
- script (`str`):
- Path or URL of the UV script, or a command.
- script_args (`list[str]`, *optional*)
- Arguments to pass to the script, or a command.
- schedule (`str`):
- One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
- CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday).
- suspend (`bool`, *optional*):
- If True, the scheduled Job is suspended (paused). Defaults to False.
- concurrency (`bool`, *optional*):
- If True, multiple instances of this Job can run concurrently. Defaults to False.
- dependencies (`list[str]`, *optional*)
- Dependencies to use to run the UV script.
- python (`str`, *optional*)
- Use a specific Python version. Default is 3.12.
- image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"):
- Use a custom Docker image with `uv` installed.
- env (`dict[str, Any]`, *optional*):
- Defines the environment variables for the Job.
- secrets (`dict[str, Any]`, *optional*):
- Defines the secret environment variables for the Job.
- flavor (`str`, *optional*):
- Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
- Defaults to `"cpu-basic"`.
- timeout (`Union[int, float, str]`, *optional*):
- Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days).
- Example: `300` or `"5m"` for 5 minutes.
- labels (`dict[str, str]`, *optional*):
- Labels to attach to the job (key-value pairs).
- volumes (`list[Volume]`, *optional*):
- Hugging Face Buckets or Repos to mount as volumes in the job container.
- Each volume is a [`Volume`] with `type` (`"bucket"`, `"model"`, `"dataset"`, or `"space"`),
- `source` (e.g. `"username/my-bucket"`), and `mount_path` (e.g. `"/data"`).
- namespace (`str`, *optional*):
- The namespace where the Job will be created. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- Schedule a script from a URL:
- ```python
- >>> from huggingface_hub import create_scheduled_uv_job
- >>> script = "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/trl/scripts/sft.py"
- >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"]
- >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small", schedule="@weekly")
- ```
- Schedule a local script:
- ```python
- >>> from huggingface_hub import create_scheduled_uv_job
- >>> script = "my_sft.py"
- >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"]
- >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small", schedule="@weekly")
- ```
- Schedule a command:
- ```python
- >>> from huggingface_hub import create_scheduled_uv_job
- >>> script = "lighteval"
- >>> script_args= ["endpoint", "inference-providers", "model_name=openai/gpt-oss-20b,provider=auto", "lighteval|gsm8k|0|0"]
- >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["lighteval"], flavor="a10g-small", schedule="@weekly")
- ```
- """
- image = image or "ghcr.io/astral-sh/uv:python3.12-bookworm"
- # Build command
- command, env, secrets = self._create_uv_command_env_and_secrets(
- script=script,
- script_args=script_args,
- dependencies=dependencies,
- python=python,
- env=env,
- secrets=secrets,
- namespace=namespace,
- token=token,
- )
- # Create RunCommand args
- return self.create_scheduled_job(
- image=image,
- command=command,
- schedule=schedule,
- suspend=suspend,
- concurrency=concurrency,
- env=env,
- secrets=secrets,
- flavor=flavor,
- timeout=timeout,
- labels=labels,
- volumes=volumes,
- namespace=namespace,
- token=token,
- )
- def _create_uv_command_env_and_secrets(
- self,
- *,
- script: str,
- script_args: list[str] | None,
- dependencies: list[str] | None,
- python: str | None,
- env: dict[str, Any] | None,
- secrets: dict[str, Any] | None,
- namespace: str | None,
- token: bool | str | None,
- ) -> tuple[list[str], dict[str, Any], dict[str, Any]]:
- env = env or {}
- secrets = secrets or {}
- # Build command
- uv_args = []
- if dependencies:
- for dependency in dependencies:
- uv_args += ["--with", dependency]
- if python:
- uv_args += ["--python", python]
- script_args = script_args or []
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- # Find the local files to pass to the job
- local_files_to_include = {candidate for candidate in [script] + script_args if Path(candidate).is_file()}
- # Fail early for missing scripts or config files
- missing_local_files = {
- candidate
- for candidate in [script] + script_args
- if not Path(candidate).is_file()
- and Path(candidate).suffix in [".py", ".sh", ".yaml", ".yml", ".toml"]
- and not candidate.startswith("https://")
- and not candidate.startswith("http://")
- }
- if missing_local_files:
- raise FileNotFoundError(", ".join(missing_local_files))
- if len(local_files_to_include) == 0:
- # Direct URL execution or command - no upload needed
- command = ["uv", "run"] + uv_args + [script] + script_args
- else:
- # Find appropriate remote file names
- remote_to_local_file_names: dict[str, str] = {}
- for local_file_to_include in local_files_to_include:
- local_file_path = Path(local_file_to_include)
- # remove spaces for proper xargs parsing
- remote_file_path = Path(local_file_path.name.replace(" ", "_"))
- if remote_file_path.name in remote_to_local_file_names:
- for i in itertools.count():
- remote_file_name = remote_file_path.with_stem(remote_file_path.stem + f"({i})").name
- if remote_file_name not in remote_to_local_file_names:
- remote_to_local_file_names[remote_file_name] = local_file_to_include
- break
- else:
- remote_to_local_file_names[remote_file_path.name] = local_file_to_include
- local_to_remote_file_names = {
- local_file_to_include: remote_file_name
- for remote_file_name, local_file_to_include in remote_to_local_file_names.items()
- }
- # Replace local paths with remote paths in command
- if script in local_to_remote_file_names:
- script = local_to_remote_file_names[script]
- script_args = [
- local_to_remote_file_names[arg] if arg in local_to_remote_file_names else arg for arg in script_args
- ]
- # Load content to pass as environment variable with format
- # file1 base64content1
- # file2 base64content2
- # ...
- env["LOCAL_FILES_ENCODED"] = "\n".join(
- remote_file_name + " " + base64.b64encode(Path(local_file_to_include).read_bytes()).decode()
- for remote_file_name, local_file_to_include in remote_to_local_file_names.items()
- )
- # Shell-quote each arg to prevent metacharacters (e.g. '>') from being interpreted by bash
- quoted_parts = ["'" + arg.replace("'", r"'\''") + "'" for arg in [*uv_args, script, *script_args]]
- command = [
- "bash",
- "-c",
- """echo $LOCAL_FILES_ENCODED | xargs -n 2 bash -c 'echo "$1" | base64 -d > "$0"' && """
- + f"uv run {' '.join(quoted_parts)}",
- ]
- return command, env, secrets
- @validate_hf_hub_args
- def create_bucket(
- self,
- bucket_id: str,
- *,
- private: bool | None = None,
- resource_group_id: str | None = None,
- exist_ok: bool = False,
- token: bool | str | None = None,
- ) -> BucketUrl:
- """Create a bucket on the Hub.
- Args:
- bucket_id (`str`):
- A namespace (user or an organization) and a bucket name separated by a `/`.
- If no namespace is provided, the bucket will be created in the current user's namespace.
- private (`bool`, *optional*):
- Whether to make the bucket private. If `None` (default), the bucket will be public unless the
- organization's default is private.
- resource_group_id (`str`, *optional*):
- Resource group in which to create the bucket. Resource groups are only available for Enterprise Hub
- organizations and allow to define which members of the organization can access the resource. The ID
- of a resource group can be found in the URL of the resource's page on the Hub
- (e.g. `"66670e5163145ca562cb1988"`). To learn more about resource groups, see
- https://huggingface.co/docs/hub/en/security-resource-groups.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if the bucket already exists.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`BucketUrl`]: URL to the newly created bucket containing
- attributes like `endpoint`, `namespace`, and `bucket_id`.
- Example:
- ```python
- >>> from huggingface_hub import create_bucket
- >>> url = create_bucket(bucket_id="my-bucket")
- >>> url.bucket_id
- 'user/my-bucket'
- >>> url.url
- 'https://huggingface.co/buckets/user/my-bucket'
- >>> url.handle
- 'hf://buckets/user/my-bucket'
- >>> create_bucket(bucket_id="my-bucket", private=True, exist_ok=True)
- BucketUrl(...)
- ```
- """
- payload: dict[str, Any] = {}
- if private is not None:
- payload["private"] = private
- if resource_group_id is not None:
- payload["resourceGroupId"] = resource_group_id
- if "/" not in bucket_id:
- namespace, name = "me", bucket_id # "me" namespace refers to the current user
- else:
- bucket_id_parsed, prefix = _split_bucket_id_and_prefix(bucket_id)
- if prefix:
- raise ValueError(f"Invalid bucket ID: {bucket_id}")
- namespace, name = bucket_id_parsed.split("/")
- response = get_session().post(
- f"{self.endpoint}/api/buckets/{namespace}/{name}",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- try:
- hf_raise_for_status(response)
- except HfHubHTTPError as err:
- if exist_ok and err.response.status_code == 409:
- # Repo already exists and `exist_ok=True`
- pass
- elif exist_ok and err.response.status_code == 403:
- # No write permission on the namespace but repo might already exist
- try:
- self.bucket_info(bucket_id=bucket_id, token=token)
- return BucketUrl(f"{self.endpoint}/buckets/{bucket_id}", endpoint=self.endpoint)
- except HfHubHTTPError:
- raise err
- else:
- raise
- return BucketUrl(response.json()["url"], endpoint=self.endpoint)
- @validate_hf_hub_args
- def bucket_info(
- self,
- bucket_id: str,
- *,
- token: bool | str | None = None,
- ) -> BucketInfo:
- """Get information about a specific bucket on the Hub.
- Args:
- bucket_id (`str`):
- The ID of the bucket (e.g. `"username/my-bucket"`).
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`BucketInfo`]: The bucket information.
- Raises:
- [`~errors.BucketNotFoundError`]: If the bucket cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- Example:
- ```python
- >>> from huggingface_hub import bucket_info
- >>> info = bucket_info(bucket_id="Wauplin/first-bucket")
- >>> info.id
- 'Wauplin/first-bucket'
- >>> info.private
- False
- >>> info.created_at
- datetime.datetime(2026, 2, 6, 17, 37, 57, tzinfo=datetime.timezone.utc)
- >>> info.size
- 551879671
- >>> info.total_files
- 12
- ```
- """
- response = get_session().get(
- f"{self.endpoint}/api/buckets/{bucket_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return BucketInfo(**response.json())
- @validate_hf_hub_args
- def list_buckets(
- self,
- namespace: str | None = None,
- *,
- token: bool | str | None = None,
- ) -> Iterable[BucketInfo]:
- """List buckets on the Hub under a certain namespace.
- Args:
- namespace (`str`, *optional*):
- List buckets under this namespace (user or organization). Defaults to listing user's buckets.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[BucketInfo]`: An iterable of [`BucketInfo`] objects.
- Example:
- ```python
- >>> from huggingface_hub import list_buckets
- >>> for bucket in list_buckets(): # lists buckets in the user's namespace
- ... print(bucket)
- >>> for bucket in list_buckets(namespace="huggingface"): # lists buckets in the "huggingface" organization
- ... print(bucket)
- ```
- """
- if namespace is None:
- namespace = "me"
- for item in paginate(
- f"{self.endpoint}/api/buckets/{namespace}", params={}, headers=self._build_hf_headers(token=token)
- ):
- yield BucketInfo(**item)
- @validate_hf_hub_args
- def delete_bucket(
- self,
- bucket_id: str,
- *,
- missing_ok: bool = False,
- token: bool | str | None = None,
- ) -> None:
- """Delete a bucket from the Hub.
- Args:
- bucket_id (`str`):
- The ID of the bucket (e.g. `"username/my-bucket"`).
- missing_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if the bucket does not exist.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`~errors.BucketNotFoundError`]: If the bucket cannot be found and `missing_ok` is set to `False` (default).
- Example:
- ```python
- >>> from huggingface_hub import delete_bucket
- >>> delete_bucket(bucket_id="Wauplin/first-bucket")
- >>> delete_bucket(bucket_id="Wauplin/first-bucket", missing_ok=True)
- ```
- """
- response = get_session().delete(
- f"{self.endpoint}/api/buckets/{bucket_id}",
- headers=self._build_hf_headers(token=token),
- )
- reset_xet_connection_info_cache_for_repo("bucket", bucket_id)
- try:
- hf_raise_for_status(response)
- except HfHubHTTPError as e:
- if e.response.status_code != 404 or not missing_ok:
- raise
- @validate_hf_hub_args
- def move_bucket(
- self,
- from_id: str,
- to_id: str,
- *,
- token: bool | str | None = None,
- ) -> None:
- """Move a bucket from "namespace1/repo_name1" to "namespace2/repo_name2"
- Note there are certain limitations. For more information about moving
- repositories, please see
- https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo.
- Args:
- from_id (`str`):
- A namespace (user or an organization) and a bucket name separated
- by a `/`. Original bucket identifier (e.g. `"username/my-bucket"`).
- to_id (`str`):
- A namespace (user or an organization) and a bucket name separated
- by a `/`. Final bucket identifier (e.g. `"username/new-bucket-name"`
- or `"organization/my-bucket"`).
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`~errors.BucketNotFoundError`]:
- If the source bucket cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- Example:
- ```python
- >>> from huggingface_hub import move_bucket
- >>> # Rename a bucket within the same namespace
- >>> move_bucket(from_id="username/old-name", to_id="username/new-name")
- >>> # Transfer a bucket to an organization
- >>> move_bucket(from_id="username/my-bucket", to_id="my-org/my-bucket")
- ```
- """
- if len(from_id.split("/")) != 2:
- raise ValueError(f"Invalid repo_id: {from_id}. It should have a namespace (:namespace:/:repo_name:)")
- if len(to_id.split("/")) != 2:
- raise ValueError(f"Invalid repo_id: {to_id}. It should have a namespace (:namespace:/:repo_name:)")
- json_payload = {"fromRepo": from_id, "toRepo": to_id, "type": "bucket"}
- path = f"{self.endpoint}/api/repos/move"
- headers = self._build_hf_headers(token=token)
- response = get_session().post(path, headers=headers, json=json_payload)
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def list_bucket_tree(
- self,
- bucket_id: str,
- prefix: str | None = None,
- *,
- recursive: bool | None = None,
- token: str | bool | None = None,
- ) -> Iterable[BucketFile | BucketFolder]:
- """List files in a bucket.
- Args:
- bucket_id (`str`):
- The ID of the bucket (e.g. `"username/my-bucket"`).
- prefix (`str`, *optional*):
- Filter results to files whose path starts with this prefix.
- recursive (`bool`, *optional*):
- If `True`, list files recursively. If `False` (default), list files and directories only at root.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[Union[BucketFile, BucketFolder]]`: An iterable of [`BucketFile`] and [`BucketFolder`] objects
- containing file and directory information (path, etc.).
- Example:
- ```python
- >>> from huggingface_hub import list_bucket_tree
- >>> for file_info in list_bucket_tree(bucket_id="username/my-bucket"):
- ... print(file_info.path)
- >>> # Filter by prefix
- >>> for file_info in list_bucket_tree(bucket_id="username/my-bucket", prefix="models/"):
- ... print(file_info.path)
- ```
- """
- encoded_prefix = "/" + quote(prefix, safe="") if prefix else ""
- params = {}
- if recursive is not None:
- params["recursive"] = recursive
- for item in paginate(
- path=f"{self.endpoint}/api/buckets/{bucket_id}/tree{encoded_prefix}",
- headers=self._build_hf_headers(token=token),
- params=params,
- ):
- if item["type"] == "file":
- yield BucketFile(**item)
- elif item["type"] == "directory":
- yield BucketFolder(**item)
- @validate_hf_hub_args
- def get_bucket_paths_info(
- self,
- bucket_id: str,
- paths: Iterable[str],
- *,
- token: str | bool | None = None,
- ) -> Iterable[BucketFile]:
- """
- Get information about a bucket's paths.
- Calls are made in batches of 1000 paths. Results are yielded as they are received.
- Args:
- bucket_id (`str`):
- The ID of the bucket (e.g. `"username/my-bucket"`).
- paths (`Iterable[str]`):
- The paths to get information about. If a path does not exist, it is ignored without raising an exception.
- Only file paths are supported.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[BucketFile]`:
- The information about the paths, as an iterable of [`BucketFile`] objects.
- Example:
- ```py
- >>> from huggingface_hub import get_bucket_paths_info
- >>> paths_info = get_bucket_paths_info("username/my-bucket", ["file.txt", "checkpoints/model.safetensors"])
- >>> for info in paths_info:
- ... print(info)
- BucketFile(type='file', path='file.txt', size=2379, xet_hash='96e637d9665bd35477b1908a23f2e254edfba0618dbd2d62f90a6baee7d139cf', mtime=datetime.datetime(2024, 9, 25, 15, 31, 2, 346000, tzinfo=datetime.timezone.utc))
- BucketFile(type='file', path='checkpoints/model.safetensors', size=2408828, xet_hash='3ed0e9fefe788ddd61d1e26eba67057e9740a064b009256fbafadf6bb95785ca', mtime=datetime.datetime(2024, 9, 25, 15, 31, 2, 346000, tzinfo=datetime.timezone.utc))
- ```
- """
- headers = self._build_hf_headers(token=token)
- for batch in chunk_iterable(paths, chunk_size=_BUCKET_PATHS_INFO_BATCH_SIZE):
- response = http_backoff(
- "POST",
- f"{self.endpoint}/api/buckets/{bucket_id}/paths-info",
- json={"paths": list(batch)},
- headers=headers,
- )
- hf_raise_for_status(response)
- for path_info in response.json():
- yield BucketFile(**path_info)
- @validate_hf_hub_args
- def copy_files(self, source: str, destination: str, *, token: str | bool | None = None) -> None:
- """Copy files between locations on the Hub.
- Copy files from a bucket or repository (model, dataset, space) to a bucket. Both individual files and
- entire folders are supported.
- Currently, only bucket destinations are supported. Copying to a repository is not supported.
- When copying from a repository, `.gitattributes` files are automatically excluded since they are
- git-specific metadata and not relevant in a bucket context.
- Args:
- source (`str`):
- Source location as an `hf://` handle. Can be a bucket path (e.g. `"hf://buckets/my-bucket/path/to/file"`)
- or a repo path (e.g. `"hf://username/my-model/weights.bin"`, `"hf://datasets/username/my-dataset/data/"`).
- destination (`str`):
- Destination location as an `hf://` handle pointing to a bucket
- (e.g. `"hf://buckets/my-bucket/target/path"`).
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError):
- If the destination is not a bucket or if the source/destination handles are invalid.
- Example:
- ```python
- >>> from huggingface_hub import copy_files
- # Copy a single file between buckets
- >>> copy_files("hf://buckets/my-bucket/data.bin", "hf://buckets/other-bucket/data.bin")
- # Copy a folder from a bucket to another bucket
- >>> copy_files("hf://buckets/my-bucket/models/", "hf://buckets/other-bucket/backup/")
- # Copy a file from a model repo to a bucket
- >>> copy_files("hf://username/my-model/model.safetensors", "hf://buckets/my-bucket/")
- # Copy an entire dataset to a bucket
- >>> copy_files("hf://datasets/username/my-dataset/", "hf://buckets/my-bucket/datasets/")
- ```
- """
- source_handle = _parse_hf_copy_handle(source)
- destination_handle = _parse_hf_copy_handle(destination)
- if isinstance(destination_handle, _RepoCopyHandle):
- raise ValueError("Bucket-to-repo and repo-to-repo copy are not supported. Destination must be a bucket.")
- destination_bucket_id = destination_handle.bucket_id
- destination_path = destination_handle.path
- destination_is_directory = False
- destination_exists_as_directory = False
- if destination_path == "":
- # Bucket root always exists as a directory
- destination_is_directory = True
- destination_exists_as_directory = True
- else:
- # Check if destination matches an existing file
- dest_path_info = list(self.get_bucket_paths_info(destination_bucket_id, [destination_path], token=token))
- if dest_path_info:
- destination_is_directory = False
- else:
- # Check if destination is an existing "directory" (prefix with children)
- destination_exists_as_directory = any(
- self.list_bucket_tree(destination_bucket_id, prefix=destination_path, recursive=False, token=token)
- )
- # Treat as directory if it exists as one, or if the user signaled with trailing slash
- destination_is_directory = destination_exists_as_directory or destination.endswith("/")
- all_adds: list[tuple[str, str]] = []
- all_copies: list[_BucketCopyFile] = []
- pending_downloads: list[tuple[str, str]] = [] # (file_path, target_path) for non-xet files to download
- def _resolve_target_path(src_file_path: str, src_root_path: str | None, is_single_file: bool) -> str:
- basename = src_file_path.rsplit("/", 1)[-1]
- if is_single_file:
- if destination_path == "":
- return basename
- if destination_is_directory:
- return f"{destination_path.rstrip('/')}/{basename}"
- return destination_path
- if src_root_path is None:
- rel_path = src_file_path
- elif src_file_path.startswith(src_root_path + "/"):
- rel_path = src_file_path[len(src_root_path) + 1 :]
- elif src_file_path == src_root_path:
- rel_path = src_file_path.rsplit("/", 1)[-1]
- else:
- raise ValueError(f"Unexpected source path while copying folder: '{src_file_path}'.")
- if rel_path == "":
- raise ValueError("Cannot copy an empty relative path.")
- # Match Unix `cp -r` behavior: when the destination already exists as a
- # directory, nest the source folder inside it (e.g. cp -r src dst → dst/src/...).
- # When the destination does not exist, use rename semantics (cp -r src new → new/...).
- if destination_exists_as_directory and src_root_path is not None:
- src_dir_basename = src_root_path.rsplit("/", 1)[-1]
- rel_path = f"{src_dir_basename}/{rel_path}"
- if destination_path == "":
- return rel_path
- return f"{destination_path.rstrip('/')}/{rel_path}"
- def _build_copy_op(
- target_path: str, xet_hash: str, size: int, source_repo_type: str, source_repo_id: str
- ) -> _BucketCopyFile:
- """Server-side copy by xet hash — no data transfer needed."""
- return _BucketCopyFile(
- destination=target_path,
- xet_hash=xet_hash,
- source_repo_type=source_repo_type,
- source_repo_id=source_repo_id,
- size=size,
- )
- def _add_repo_file(file: RepoFile, target_path: str) -> None:
- """Queue a repo file: copy-by-hash if xet-backed, otherwise download first."""
- if file.xet_hash is not None:
- all_copies.append(
- _build_copy_op(
- target_path,
- file.xet_hash,
- file.size,
- source_handle.repo_type, # type: ignore
- source_handle.repo_id, # type: ignore
- )
- )
- else:
- pending_downloads.append((file.path, target_path))
- # === Source is a bucket: always hash-based copy (no download needed) ===
- if isinstance(source_handle, _BucketCopyHandle):
- source_path = source_handle.path
- source_path_info = list(self.get_bucket_paths_info(source_handle.bucket_id, [source_path], token=token))
- if source_path_info:
- # Source path matched a single file
- source_file = source_path_info[0]
- target_path = _resolve_target_path(source_file.path, None, is_single_file=True)
- all_copies.append(
- _build_copy_op(
- target_path, source_file.xet_hash, source_file.size, "bucket", source_handle.bucket_id
- )
- )
- else:
- # Source path is a folder (or prefix) — list and copy all matching files
- for item in self.list_bucket_tree(
- source_handle.bucket_id, prefix=source_path or None, recursive=True, token=token
- ):
- if not isinstance(item, BucketFile):
- continue
- if source_path and not (item.path == source_path or item.path.startswith(source_path + "/")):
- continue
- target_path = _resolve_target_path(item.path, source_path or None, is_single_file=False)
- all_copies.append(
- _build_copy_op(target_path, item.xet_hash, item.size, "bucket", source_handle.bucket_id)
- )
- # === Source is a repo: copy-by-hash if xet-backed, download otherwise ===
- else:
- source_path = source_handle.path
- source_repo_path_info: list[RepoFile | RepoFolder] = []
- if source_path != "":
- source_repo_path_info = self.get_paths_info(
- repo_id=source_handle.repo_id,
- paths=[source_path],
- repo_type=source_handle.repo_type,
- revision=source_handle.revision,
- token=token,
- )
- if len(source_repo_path_info) == 1 and isinstance(source_repo_path_info[0], RepoFile):
- # Source path matched a single file — skip .gitattributes (git-specific metadata)
- if source_repo_path_info[0].path.rsplit("/", 1)[-1] == ".gitattributes":
- return
- target_path = _resolve_target_path(source_repo_path_info[0].path, None, is_single_file=True)
- _add_repo_file(source_repo_path_info[0], target_path)
- else:
- # Source path is a folder — list and copy all files recursively
- for repo_item in self.list_repo_tree(
- repo_id=source_handle.repo_id,
- path_in_repo=source_path,
- recursive=True,
- repo_type=source_handle.repo_type,
- revision=source_handle.revision,
- token=token,
- ):
- if not isinstance(repo_item, RepoFile):
- continue
- # Skip .gitattributes files (git-specific metadata, not relevant in a bucket)
- if repo_item.path.rsplit("/", 1)[-1] == ".gitattributes":
- continue
- target_path = _resolve_target_path(repo_item.path, source_path or None, is_single_file=False)
- _add_repo_file(repo_item, target_path)
- # Download non-xet files in parallel
- if pending_downloads:
- def _download_and_collect(item: tuple[str, str]) -> None:
- file_path, target_path = item
- local_path = self.hf_hub_download(
- repo_id=source_handle.repo_id, # type: ignore
- repo_type=source_handle.repo_type, # type: ignore
- filename=file_path,
- revision=source_handle.revision, # type: ignore
- token=token,
- tqdm_class=silent_tqdm, # type: ignore
- )
- all_adds.append((local_path, target_path))
- thread_map(_download_and_collect, pending_downloads, desc="Downloading text files for copy")
- # Send copies first (no upload needed), then adds (may need upload)
- if all_copies:
- for copy_chunk in chunk_iterable(all_copies, chunk_size=_BUCKET_BATCH_ADD_CHUNK_SIZE):
- self._batch_bucket_files(destination_bucket_id, copy=list(copy_chunk), token=token)
- if all_adds:
- for add_chunk in chunk_iterable(all_adds, chunk_size=_BUCKET_BATCH_ADD_CHUNK_SIZE):
- self._batch_bucket_files(destination_bucket_id, add=list(add_chunk), token=token)
- @validate_hf_hub_args
- def batch_bucket_files(
- self,
- bucket_id: str,
- *,
- add: list[tuple[str | Path | bytes, str]] | None = None,
- copy: list[tuple[str, str, str, str]] | None = None,
- delete: list[str] | None = None,
- token: str | bool | None = None,
- ):
- """Add, copy, and/or delete files in a bucket.
- This is a non-transactional operation. If an error occurs in the process, some files may have been uploaded,
- copied, or deleted while others haven't.
- Args:
- bucket_id (`str`):
- The ID of the bucket (e.g. `"username/my-bucket"`).
- add (`list` of `tuple`, *optional*):
- Files to upload. Each element is a `(source, destination)` tuple where `source` is a path to a local
- file (`str` or `Path`) or raw `bytes` content, and `destination` is the path in the bucket.
- copy (`list` of `tuple`, *optional*):
- Files to copy by xet hash. Each element is a `(source_repo_type, source_repo_id, xet_hash,
- destination)` tuple where:
- - `source_repo_type` is the type of the source repository: `"model"`, `"dataset"`, `"space"`, or
- `"bucket"`.
- - `source_repo_id` is the ID of the source repository or bucket (e.g. `"username/my-model"`).
- - `xet_hash` is the xet hash of the file to copy.
- - `destination` is the destination path in the bucket.
- This is a server-side operation — no data is downloaded or re-uploaded.
- delete (`list` of `str`, *optional*):
- Paths of files to delete from the bucket.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```python
- >>> from huggingface_hub import batch_bucket_files
- # Upload files
- >>> batch_bucket_files(
- ... "username/my-bucket",
- ... add=[
- ... ("./model.safetensors", "models/model.safetensors"),
- ... (b'{{"key": "value"}}', "config.json"),
- ... ],
- ... )
- # Copy xet files from another bucket or repo (server-side, no data transfer)
- >>> batch_bucket_files(
- ... "username/my-bucket",
- ... copy=[
- ... ("bucket", "username/source-bucket", "<xethash_1>", "models/model.safetensors"),
- ... ("model", "username/my-model", "<xethash_2>", "models/config.safetensors"),
- ... ],
- ... )
- # Delete files
- >>> batch_bucket_files("username/my-bucket", delete=["old-model.bin"])
- # Upload and delete in one batch
- >>> batch_bucket_files(
- ... "username/my-bucket",
- ... add=[("./new.txt", "new.txt")],
- ... delete=["old.txt"],
- ... )
- ```
- """
- add = add or []
- copy = copy or []
- delete = delete or []
- # Small batch: do everything in one call
- if len(add) + len(copy) + len(delete) <= _BUCKET_BATCH_ADD_CHUNK_SIZE:
- self._batch_bucket_files(bucket_id, add=add, copy=copy, delete=delete, token=token) # type: ignore
- return
- # Large batch: chunk copies first (no upload), then adds, then deletes
- from .utils._xet_progress_reporting import XetProgressReporter
- if add and not are_progress_bars_disabled():
- progress = XetProgressReporter(total_files=len(add))
- else:
- progress = None
- try:
- for copy_chunk in chunk_iterable(copy, chunk_size=_BUCKET_BATCH_ADD_CHUNK_SIZE):
- self._batch_bucket_files(bucket_id, copy=list(copy_chunk), token=token)
- for add_chunk in chunk_iterable(add, chunk_size=_BUCKET_BATCH_ADD_CHUNK_SIZE):
- self._batch_bucket_files(bucket_id, add=list(add_chunk), token=token, _progress=progress)
- for delete_chunk in chunk_iterable(delete, chunk_size=_BUCKET_BATCH_DELETE_CHUNK_SIZE):
- self._batch_bucket_files(bucket_id, delete=list(delete_chunk), token=token)
- finally:
- if progress is not None:
- progress.close(False)
- return
- def _batch_bucket_files(
- self,
- bucket_id: str,
- *,
- add: list[tuple[str | Path | bytes, str] | _BucketAddFile] | None = None,
- copy: list[tuple[str, str, str, str] | _BucketCopyFile] | None = None,
- delete: list[str | _BucketDeleteFile] | None = None,
- token: str | bool | None = None,
- _progress: XetProgressReporter | None = None,
- ):
- """Internal method: process a single batch of bucket file operations (upload to XET + call /batch)."""
- # Convert public API inputs to internal operation objects
- operations: list[_BucketAddFile | _BucketCopyFile | _BucketDeleteFile] = []
- if add:
- for add_item in add:
- if isinstance(add_item, _BucketAddFile):
- operations.append(add_item)
- else:
- source, destination = add_item
- operations.append(_BucketAddFile(source=source, destination=destination))
- if copy:
- for copy_item in copy:
- if isinstance(copy_item, _BucketCopyFile):
- operations.append(copy_item)
- else:
- source_repo_type, source_repo_id, xet_hash, destination = copy_item
- operations.append(
- _BucketCopyFile(
- destination=destination,
- xet_hash=xet_hash,
- source_repo_type=source_repo_type,
- source_repo_id=source_repo_id,
- )
- )
- if delete:
- for delete_item in delete:
- if isinstance(delete_item, _BucketDeleteFile):
- operations.append(delete_item)
- else:
- operations.append(_BucketDeleteFile(path=delete_item))
- if not operations:
- return
- from hf_xet import upload_bytes, upload_files
- from .utils._xet_progress_reporting import XetProgressReporter
- headers = self._build_hf_headers(token=token)
- add_operations = [op for op in operations if isinstance(op, _BucketAddFile)]
- add_operations_to_upload = [op for op in add_operations if op.xet_hash is None]
- add_bytes_operations = [op for op in add_operations if isinstance(op.source, bytes)]
- add_path_operations = [op for op in add_operations if not isinstance(op.source, bytes)]
- if len(add_operations_to_upload) > 0:
- try:
- xet_connection_info = fetch_xet_connection_info_from_repo_info(
- token_type=XetTokenType.WRITE,
- repo_id=bucket_id,
- repo_type="bucket",
- headers=headers,
- endpoint=self.endpoint,
- )
- except HfHubHTTPError as e:
- if e.response.status_code == 401:
- raise XetAuthorizationError(
- f"You are unauthorized to upload to xet storage for bucket/{bucket_id}. "
- f"Please check that you have configured your access token with write access to the repo."
- ) from e
- raise
- xet_endpoint = xet_connection_info.endpoint
- access_token_info = (xet_connection_info.access_token, xet_connection_info.expiration_unix_epoch)
- def token_refresher() -> tuple[str, int]:
- new_xet_connection = fetch_xet_connection_info_from_repo_info(
- token_type=XetTokenType.WRITE,
- repo_id=bucket_id,
- repo_type="bucket",
- headers=headers,
- endpoint=self.endpoint,
- )
- if new_xet_connection is None:
- raise XetRefreshTokenError("Failed to refresh xet token")
- return new_xet_connection.access_token, new_xet_connection.expiration_unix_epoch
- owns_progress = _progress is None
- if _progress is not None:
- progress = _progress
- progress_callback = progress.update_progress
- elif not are_progress_bars_disabled():
- progress = XetProgressReporter()
- progress_callback = progress.update_progress
- else:
- progress, progress_callback = None, None
- try:
- # 2.a. Upload path files
- xet_upload_infos = upload_files(
- [str(op.source) for op in add_path_operations if op.xet_hash is None],
- xet_endpoint,
- access_token_info,
- token_refresher,
- progress_callback,
- "bucket",
- skip_sha256=True,
- )
- for upload_info, op in zip(
- xet_upload_infos, [op for op in add_path_operations if op.xet_hash is None]
- ):
- op.xet_hash = upload_info.hash
- op.size = upload_info.filesize
- if progress is not None:
- progress.notify_upload_complete()
- # 2.b. Upload bytes files
- xet_upload_infos = upload_bytes(
- [op.source for op in add_bytes_operations if op.xet_hash is None],
- xet_endpoint,
- access_token_info,
- token_refresher,
- progress_callback,
- "bucket",
- skip_sha256=True,
- )
- for upload_info, op in zip(
- xet_upload_infos, [op for op in add_bytes_operations if op.xet_hash is None]
- ):
- op.xet_hash = upload_info.hash
- op.size = upload_info.filesize
- if progress is not None:
- progress.notify_upload_complete()
- finally:
- if owns_progress and progress is not None:
- progress.close(False)
- # 3. /batch call
- def _payload_as_ndjson() -> Iterable[bytes]:
- for op in operations:
- if isinstance(op, _BucketAddFile):
- payload = {
- "type": "addFile",
- "path": op.destination,
- "xetHash": op.xet_hash,
- "mtime": op.mtime,
- }
- if op.content_type is not None:
- payload["contentType"] = op.content_type
- elif isinstance(op, _BucketCopyFile):
- payload = {
- "type": "copyFile",
- "path": op.destination,
- "xetHash": op.xet_hash,
- "sourceRepoType": op.source_repo_type,
- "sourceRepoId": op.source_repo_id,
- }
- else:
- payload = {
- "type": "deleteFile",
- "path": op.path,
- }
- yield json.dumps(payload).encode()
- yield b"\n"
- headers = {
- "Content-Type": "application/x-ndjson",
- **headers,
- }
- data = b"".join(_payload_as_ndjson())
- response = http_backoff(
- "POST", f"{self.endpoint}/api/buckets/{bucket_id}/batch", headers=headers, content=data
- )
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def get_bucket_file_metadata(
- self,
- bucket_id: str,
- remote_path: str,
- *,
- token: str | bool | None = None,
- ) -> BucketFileMetadata:
- """Fetch metadata of a file in a bucket.
- Args:
- bucket_id (`str`):
- The ID of the bucket (e.g. `"username/my-bucket"`).
- remote_path (`str`):
- The path of the file in the bucket.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`BucketFileMetadata`]: The file metadata containing size and xet information.
- Example:
- ```python
- >>> from huggingface_hub import get_bucket_file_metadata
- >>> metadata = get_bucket_file_metadata(
- ... bucket_id="username/my-bucket",
- ... remote_path="models/model.safetensors",
- ... )
- >>> metadata.size
- 42000
- ```
- """
- response = _httpx_follow_relative_redirects_with_backoff(
- "HEAD",
- f"{self.endpoint}/buckets/{bucket_id}/resolve/{quote(remote_path, safe='')}",
- headers=self._build_hf_headers(token=token),
- retry_on_errors=True,
- )
- xet_file_data = parse_xet_file_data_from_response(response)
- if xet_file_data is None:
- raise ValueError(f"Could not parse xet file data for '{remote_path}' in bucket '{bucket_id}'.")
- size = response.headers.get("Content-Length")
- if size is None:
- raise ValueError(f"Could not get size for '{remote_path}' in bucket '{bucket_id}'.")
- return BucketFileMetadata(size=int(size), xet_file_data=xet_file_data)
- @validate_hf_hub_args
- def download_bucket_files(
- self,
- bucket_id: str,
- files: list[tuple[str | BucketFile, str | Path]],
- *,
- raise_on_missing_files: bool = False,
- token: str | bool | None = None,
- ) -> None:
- """Download files from a bucket.
- Files input is a list of `(remote file, local file)` tuples where `remote file` is either the path of the file
- in the bucket or a [`BucketFile`] object, and `local file` is the destination path on the local filesystem.
- When passing a [`BucketFile`] object (obtained from [`list_bucket_tree`]), the method will skip the metadata
- fetching step and directly download the files.
- Args:
- bucket_id (`str`):
- The ID of the bucket (e.g. `"username/my-bucket"`).
- files (`list[tuple[Union[str, BucketFile], Union[str, Path]]]`):
- Files to download as a list of tuple (source, destination). See description above for format details.
- raise_on_missing_files (`bool`, *optional*):
- If `True`, raise an [`EntryNotFoundError`] when a requested file does not exist in the bucket. If
- `False` (default), missing files are skipped with a warning.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```python
- >>> from huggingface_hub import download_bucket_files
- >>> download_bucket_files(
- ... bucket_id="username/my-bucket",
- ... files=[
- ... ("models/model.safetensors", "./local/model.safetensors"),
- ... ("config.json", "./local/config.json"),
- ... ],
- ... )
- ```
- ```python
- >>> from huggingface_hub import download_bucket_files
- >>> parquet_files = [file for file in list_bucket_tree(bucket_id="username/my-bucket") if file.path.endswith(".parquet")]
- >>> download_bucket_files(
- ... bucket_id="username/my-bucket",
- ... files=[(file, f"./local/{file.path}") for file in parquet_files],
- ... )
- ```
- """
- from hf_xet import PyXetDownloadInfo, download_files # type: ignore[no-redef]
- headers = self._build_hf_headers(token=token)
- if len(files) == 0:
- return
- # Resolve all string paths to BucketFile objects in a single batch request
- str_paths = [path for path, _ in files if not isinstance(path, BucketFile)]
- bucket_files_by_path: dict[str, BucketFile] = {}
- if str_paths:
- bucket_files_by_path = {
- info.path: info for info in self.get_bucket_paths_info(bucket_id, str_paths, token=token)
- }
- # Check for missing files
- missing_paths = [path for path in str_paths if path not in bucket_files_by_path]
- if missing_paths:
- if raise_on_missing_files:
- raise EntryNotFoundError(
- f"{len(missing_paths)} file(s) not found in bucket '{bucket_id}': {', '.join(missing_paths)}"
- )
- for path in missing_paths:
- warnings.warn(f"File '{path}' not found in bucket '{bucket_id}'. Skipping.")
- xet_download_infos = []
- first_valid_bucket_file: BucketFile | None = None
- for remote_file, local_path in files:
- if not isinstance(remote_file, BucketFile):
- if remote_file not in bucket_files_by_path:
- continue # skip missing files (already warned above)
- remote_file = bucket_files_by_path[remote_file]
- if first_valid_bucket_file is None:
- first_valid_bucket_file = remote_file
- xet_download_infos.append(
- PyXetDownloadInfo(
- destination_path=str(Path(local_path).absolute()),
- hash=remote_file.xet_hash,
- file_size=remote_file.size,
- )
- )
- if len(xet_download_infos) == 0 or first_valid_bucket_file is None:
- return
- # Fetch Xet connection info (same for all files)
- remote_path = first_valid_bucket_file.path
- metadata = self.get_bucket_file_metadata(bucket_id, remote_path, token=token)
- connection_info = refresh_xet_connection_info(file_data=metadata.xet_file_data, headers=headers)
- def token_refresher() -> tuple[str, int]:
- connection_info = refresh_xet_connection_info(file_data=metadata.xet_file_data, headers=headers)
- if connection_info is None:
- raise ValueError("Failed to refresh token using xet metadata.")
- return connection_info.access_token, connection_info.expiration_unix_epoch
- # Create empty files for zero-size files (no need to download them)
- # and filter them out from xet_download_infos to avoid passing to xet library
- non_zero_download_infos = []
- for download_info in xet_download_infos:
- if download_info.file_size == 0:
- dest_path = Path(download_info.destination_path)
- if dest_path.exists():
- # already exists => make sure it's an empty file
- if dest_path.is_dir():
- raise IsADirectoryError(f"Expected file but found directory at '{dest_path}'")
- if dest_path.stat().st_size != 0:
- dest_path.write_bytes(b"")
- else:
- # doesn't exist => create it
- dest_path.parent.mkdir(parents=True, exist_ok=True)
- dest_path.touch()
- else:
- non_zero_download_infos.append(download_info)
- # If only zero-size files, nothing more to download
- if len(non_zero_download_infos) == 0:
- return
- # Download files
- progress_cm = _get_progress_bar_context(
- desc="Downloading bucket files",
- log_level=logger.getEffectiveLevel(),
- total=sum(info.file_size for info in non_zero_download_infos),
- initial=0,
- name="huggingface_hub.download_bucket_files",
- )
- with progress_cm as progress:
- def progress_updater(progress_bytes: float):
- progress.update(progress_bytes)
- download_files(
- non_zero_download_infos,
- endpoint=connection_info.endpoint,
- token_info=(connection_info.access_token, connection_info.expiration_unix_epoch),
- token_refresher=token_refresher,
- progress_updater=[progress_updater] * len(non_zero_download_infos),
- )
- @validate_hf_hub_args
- def sync_bucket(
- self,
- source: str | None = None,
- dest: str | None = None,
- *,
- delete: bool = False,
- ignore_times: bool = False,
- ignore_sizes: bool = False,
- existing: bool = False,
- ignore_existing: bool = False,
- include: list[str] | None = None,
- exclude: list[str] | None = None,
- filter_from: str | None = None,
- plan: str | None = None,
- apply: str | None = None,
- dry_run: bool = False,
- verbose: bool = False,
- quiet: bool = False,
- token: bool | str | None = None,
- ) -> SyncPlan:
- """Sync files between a local directory and a bucket.
- This is equivalent to the ``hf buckets sync`` CLI command. One of ``source`` or ``dest`` must be a bucket path
- (``hf://buckets/...``) and the other must be a local directory path.
- Args:
- source (`str`, *optional*):
- Source path: local directory or ``hf://buckets/namespace/bucket_name(/prefix)``.
- Required unless using ``apply``.
- dest (`str`, *optional*):
- Destination path: local directory or ``hf://buckets/namespace/bucket_name(/prefix)``.
- Required unless using ``apply``.
- delete (`bool`, *optional*, defaults to `False`):
- Delete destination files not present in source.
- ignore_times (`bool`, *optional*, defaults to `False`):
- Skip files only based on size, ignoring modification times.
- ignore_sizes (`bool`, *optional*, defaults to `False`):
- Skip files only based on modification times, ignoring sizes.
- existing (`bool`, *optional*, defaults to `False`):
- Skip creating new files on receiver (only update existing files).
- ignore_existing (`bool`, *optional*, defaults to `False`):
- Skip updating files that exist on receiver (only create new files).
- include (`list[str]`, *optional*):
- Include files matching patterns (fnmatch-style).
- exclude (`list[str]`, *optional*):
- Exclude files matching patterns (fnmatch-style).
- filter_from (`str`, *optional*):
- Path to a filter file with include/exclude rules.
- plan (`str`, *optional*):
- Save sync plan to this JSONL file instead of executing.
- apply (`str`, *optional*):
- Apply a previously saved plan file. When set, ``source`` and ``dest`` are not needed.
- dry_run (`bool`, *optional*, defaults to `False`):
- Print sync plan to stdout as JSONL without executing.
- verbose (`bool`, *optional*, defaults to `False`):
- Show detailed per-file operations.
- quiet (`bool`, *optional*, defaults to `False`):
- Suppress all output and progress bars.
- token (Union[bool, str, None], optional):
- A valid user access token. If not provided, the locally saved token will be used.
- Returns:
- [`SyncPlan`]: The computed (or loaded) sync plan.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # Upload local directory to bucket
- >>> api.sync_bucket("./data", "hf://buckets/username/my-bucket")
- # Download bucket to local directory
- >>> api.sync_bucket("hf://buckets/username/my-bucket", "./data")
- # Sync with delete and filtering
- >>> api.sync_bucket(
- ... "./data",
- ... "hf://buckets/username/my-bucket",
- ... delete=True,
- ... include=["*.safetensors"],
- ... )
- # Dry run: preview what would be synced
- >>> plan = api.sync_bucket("./data", "hf://buckets/username/my-bucket", dry_run=True)
- >>> plan.summary()
- {'uploads': 3, 'downloads': 0, 'deletes': 0, 'skips': 1, 'total_size': 4096}
- # Save plan for review, then apply
- >>> api.sync_bucket("./data", "hf://buckets/username/my-bucket", plan="sync-plan.jsonl")
- >>> api.sync_bucket(apply="sync-plan.jsonl")
- ```
- """
- return sync_bucket_internal(
- source=source,
- dest=dest,
- api=self,
- delete=delete,
- ignore_times=ignore_times,
- ignore_sizes=ignore_sizes,
- existing=existing,
- ignore_existing=ignore_existing,
- include=include,
- exclude=exclude,
- filter_from=filter_from,
- plan=plan,
- apply=apply,
- dry_run=dry_run,
- verbose=verbose,
- quiet=quiet,
- token=token,
- )
- def _parse_revision_from_pr_url(pr_url: str) -> str:
- """Safely parse revision number from a PR url.
- Example:
- ```py
- >>> _parse_revision_from_pr_url("https://huggingface.co/bigscience/bloom/discussions/2")
- "refs/pr/2"
- ```
- """
- re_match = re.match(_REGEX_DISCUSSION_URL, pr_url)
- if re_match is None:
- raise RuntimeError(f"Unexpected response from the hub, expected a Pull Request URL but got: '{pr_url}'")
- return f"refs/pr/{re_match[1]}"
- def parse_local_safetensors_file_metadata(path: str | Path) -> SafetensorsFileMetadata:
- """
- Parse metadata from a local safetensors file.
- For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
- Args:
- path (`str` or `Path`):
- Path to the safetensors file.
- Returns:
- [`SafetensorsFileMetadata`]: information related to the safetensors file.
- Raises:
- [`SafetensorsParsingError`]:
- If the safetensors file header couldn't be parsed correctly.
- `FileNotFoundError`:
- If the file does not exist.
- Example:
- ```py
- >>> metadata = parse_local_safetensors_file_metadata("path/to/model.safetensors")
- >>> metadata
- SafetensorsFileMetadata(
- metadata={'format': 'pt'},
- tensors={'layer.weight': TensorInfo(dtype='F32', shape=[512, 512], ...}, ...}
- )
- >>> metadata.parameter_count
- {'F32': 262144}
- ```
- """
- path = Path(path)
- filename = path.name
- context_msg = f"path '{path}'"
- with open(path, "rb") as f:
- # 1. Read first 8 bytes and parse/validate metadata size using shared helper
- size_bytes = f.read(8)
- metadata_size = _get_safetensors_metadata_size(size_bytes, filename, context_msg)
- # 2. Read metadata bytes
- metadata_as_bytes = f.read(metadata_size)
- if len(metadata_as_bytes) < metadata_size:
- raise SafetensorsParsingError(
- f"Failed to parse safetensors header for '{filename}' ({context_msg}): file is truncated. Expected "
- f"{metadata_size} bytes of metadata but got {len(metadata_as_bytes)}."
- )
- # 3. Parse using shared helper
- return _parse_safetensors_header(metadata_as_bytes, filename, context_msg)
- def get_local_safetensors_metadata(path: str | Path) -> SafetensorsRepoMetadata:
- """
- Parse metadata for a local safetensors file or folder.
- Supports:
- - Single safetensors file (e.g., `model.safetensors`)
- - Directory with non-sharded model (contains `model.safetensors`)
- - Directory with sharded model (contains `model.safetensors.index.json`)
- For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
- Args:
- path (`str` or `Path`):
- Path to a safetensors file or directory containing safetensors files.
- Returns:
- [`SafetensorsRepoMetadata`]: information related to the safetensors repo.
- Raises:
- [`NotASafetensorsRepoError`]:
- If the path is not a valid safetensors file or folder (i.e., doesn't have either a
- `model.safetensors` or a `model.safetensors.index.json` file).
- [`SafetensorsParsingError`]:
- If a safetensors file header couldn't be parsed correctly.
- `FileNotFoundError`:
- If the path does not exist.
- Example:
- ```py
- # Parse single safetensors file
- >>> metadata = get_local_safetensors_metadata("path/to/model.safetensors")
- >>> metadata
- SafetensorsRepoMetadata(metadata=None, sharded=False, weight_map={...}, files_metadata={...})
- # Parse directory with sharded model
- >>> metadata = get_local_safetensors_metadata("path/to/model_folder")
- >>> metadata
- SafetensorsRepoMetadata(metadata={'total_size': ...}, sharded=True, weight_map={...}, files_metadata={...})
- >>> len(metadata.files_metadata)
- 3 # Number of safetensors shards
- ```
- """
- path = Path(path)
- # Case 1: Direct path to a safetensors file
- if path.is_file():
- file_metadata = parse_local_safetensors_file_metadata(path)
- return SafetensorsRepoMetadata(
- metadata=None,
- sharded=False,
- weight_map={tensor_name: path.name for tensor_name in file_metadata.tensors.keys()},
- files_metadata={path.name: file_metadata},
- )
- # Case 2: Directory
- if not path.is_dir():
- raise FileNotFoundError(f"Path '{path}' does not exist.")
- single_file_path = path / constants.SAFETENSORS_SINGLE_FILE
- index_file_path = path / constants.SAFETENSORS_INDEX_FILE
- # Case 2a: Non-sharded model (single model.safetensors file)
- if single_file_path.exists():
- file_metadata = parse_local_safetensors_file_metadata(single_file_path)
- return SafetensorsRepoMetadata(
- metadata=None,
- sharded=False,
- weight_map={
- tensor_name: constants.SAFETENSORS_SINGLE_FILE for tensor_name in file_metadata.tensors.keys()
- },
- files_metadata={constants.SAFETENSORS_SINGLE_FILE: file_metadata},
- )
- # Case 2b: Sharded model (model.safetensors.index.json)
- if index_file_path.exists():
- with open(index_file_path) as f:
- index = json.load(f)
- weight_map = index.get("weight_map", {})
- # Parse metadata from each shard
- files_metadata = {}
- for shard_filename in set(weight_map.values()):
- shard_path = path / shard_filename
- files_metadata[shard_filename] = parse_local_safetensors_file_metadata(shard_path)
- return SafetensorsRepoMetadata(
- metadata=index.get("metadata", None),
- sharded=True,
- weight_map=weight_map,
- files_metadata=files_metadata,
- )
- # Not a valid safetensors folder
- raise NotASafetensorsRepoError(
- f"'{path}' is not a valid safetensors folder. Couldn't find '{constants.SAFETENSORS_INDEX_FILE}' or "
- f"'{constants.SAFETENSORS_SINGLE_FILE}' files."
- )
- api = HfApi()
- whoami = api.whoami
- auth_check = api.auth_check
- list_models = api.list_models
- model_info = api.model_info
- list_datasets = api.list_datasets
- list_dataset_parquet_files = api.list_dataset_parquet_files
- dataset_info = api.dataset_info
- get_dataset_leaderboard = api.get_dataset_leaderboard
- list_spaces = api.list_spaces
- search_spaces = api.search_spaces
- space_info = api.space_info
- kernel_info = api.kernel_info
- list_papers = api.list_papers
- paper_info = api.paper_info
- read_paper = api.read_paper
- list_daily_papers = api.list_daily_papers
- repo_exists = api.repo_exists
- revision_exists = api.revision_exists
- file_exists = api.file_exists
- repo_info = api.repo_info
- list_repo_files = api.list_repo_files
- list_repo_refs = api.list_repo_refs
- list_repo_commits = api.list_repo_commits
- list_repo_tree = api.list_repo_tree
- get_paths_info = api.get_paths_info
- verify_repo_checksums = api.verify_repo_checksums
- get_model_tags = api.get_model_tags
- get_dataset_tags = api.get_dataset_tags
- create_commit = api.create_commit
- create_repo = api.create_repo
- delete_repo = api.delete_repo
- update_repo_settings = api.update_repo_settings
- move_repo = api.move_repo
- upload_file = api.upload_file
- upload_folder = api.upload_folder
- delete_file = api.delete_file
- delete_folder = api.delete_folder
- delete_files = api.delete_files
- upload_large_folder = api.upload_large_folder
- preupload_lfs_files = api.preupload_lfs_files
- create_branch = api.create_branch
- delete_branch = api.delete_branch
- create_tag = api.create_tag
- delete_tag = api.delete_tag
- get_full_repo_name = api.get_full_repo_name
- # Danger-zone API
- super_squash_history = api.super_squash_history
- list_lfs_files = api.list_lfs_files
- permanently_delete_lfs_files = api.permanently_delete_lfs_files
- # Safetensors helpers
- get_safetensors_metadata = api.get_safetensors_metadata
- parse_safetensors_file_metadata = api.parse_safetensors_file_metadata
- # Background jobs
- run_as_future = api.run_as_future
- # Activity API
- list_liked_repos = api.list_liked_repos
- list_repo_likers = api.list_repo_likers
- unlike = api.unlike
- # Community API
- get_discussion_details = api.get_discussion_details
- get_repo_discussions = api.get_repo_discussions
- create_discussion = api.create_discussion
- create_pull_request = api.create_pull_request
- change_discussion_status = api.change_discussion_status
- comment_discussion = api.comment_discussion
- edit_discussion_comment = api.edit_discussion_comment
- rename_discussion = api.rename_discussion
- merge_pull_request = api.merge_pull_request
- # Space API
- add_space_secret = api.add_space_secret
- delete_space_secret = api.delete_space_secret
- get_space_variables = api.get_space_variables
- add_space_variable = api.add_space_variable
- delete_space_variable = api.delete_space_variable
- get_space_runtime = api.get_space_runtime
- request_space_hardware = api.request_space_hardware
- set_space_sleep_time = api.set_space_sleep_time
- pause_space = api.pause_space
- restart_space = api.restart_space
- duplicate_repo = api.duplicate_repo
- duplicate_space = api.duplicate_space
- request_space_storage = api.request_space_storage
- delete_space_storage = api.delete_space_storage
- set_space_volumes = api.set_space_volumes
- delete_space_volumes = api.delete_space_volumes
- enable_space_dev_mode = api.enable_space_dev_mode
- disable_space_dev_mode = api.disable_space_dev_mode
- fetch_space_logs = api.fetch_space_logs
- # Inference Endpoint API
- list_inference_endpoints = api.list_inference_endpoints
- create_inference_endpoint = api.create_inference_endpoint
- get_inference_endpoint = api.get_inference_endpoint
- update_inference_endpoint = api.update_inference_endpoint
- delete_inference_endpoint = api.delete_inference_endpoint
- pause_inference_endpoint = api.pause_inference_endpoint
- resume_inference_endpoint = api.resume_inference_endpoint
- scale_to_zero_inference_endpoint = api.scale_to_zero_inference_endpoint
- create_inference_endpoint_from_catalog = api.create_inference_endpoint_from_catalog
- list_inference_catalog = api.list_inference_catalog
- # Collections API
- get_collection = api.get_collection
- list_collections = api.list_collections
- create_collection = api.create_collection
- update_collection_metadata = api.update_collection_metadata
- delete_collection = api.delete_collection
- add_collection_item = api.add_collection_item
- update_collection_item = api.update_collection_item
- delete_collection_item = api.delete_collection_item
- delete_collection_item = api.delete_collection_item
- # Access requests API
- list_pending_access_requests = api.list_pending_access_requests
- list_accepted_access_requests = api.list_accepted_access_requests
- list_rejected_access_requests = api.list_rejected_access_requests
- cancel_access_request = api.cancel_access_request
- accept_access_request = api.accept_access_request
- reject_access_request = api.reject_access_request
- grant_access = api.grant_access
- # Webhooks API
- create_webhook = api.create_webhook
- disable_webhook = api.disable_webhook
- delete_webhook = api.delete_webhook
- enable_webhook = api.enable_webhook
- get_webhook = api.get_webhook
- list_webhooks = api.list_webhooks
- update_webhook = api.update_webhook
- # User API
- get_user_overview = api.get_user_overview
- get_organization_overview = api.get_organization_overview
- list_organization_followers = api.list_organization_followers
- list_organization_members = api.list_organization_members
- list_user_followers = api.list_user_followers
- list_user_following = api.list_user_following
- # Jobs API
- run_job = api.run_job
- fetch_job_logs = api.fetch_job_logs
- fetch_job_metrics = api.fetch_job_metrics
- list_jobs = api.list_jobs
- list_jobs_hardware = api.list_jobs_hardware
- inspect_job = api.inspect_job
- cancel_job = api.cancel_job
- run_uv_job = api.run_uv_job
- create_scheduled_job = api.create_scheduled_job
- list_scheduled_jobs = api.list_scheduled_jobs
- inspect_scheduled_job = api.inspect_scheduled_job
- delete_scheduled_job = api.delete_scheduled_job
- suspend_scheduled_job = api.suspend_scheduled_job
- resume_scheduled_job = api.resume_scheduled_job
- create_scheduled_uv_job = api.create_scheduled_uv_job
- # Buckets API
- create_bucket = api.create_bucket
- bucket_info = api.bucket_info
- list_buckets = api.list_buckets
- delete_bucket = api.delete_bucket
- move_bucket = api.move_bucket
- list_bucket_tree = api.list_bucket_tree
- get_bucket_paths_info = api.get_bucket_paths_info
- copy_files = api.copy_files
- batch_bucket_files = api.batch_bucket_files
- get_bucket_file_metadata = api.get_bucket_file_metadata
- download_bucket_files = api.download_bucket_files
- sync_bucket = api.sync_bucket
|