| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049 |
- #
- # Author: Joris Vankerschaver 2013
- #
- import math
- import warnings
- import threading
- import types
- import numpy as np
- import scipy.linalg
- from scipy._lib import doccer
- from scipy.special import (gammaln, psi, multigammaln, xlogy, entr, betaln,
- ive, loggamma)
- from scipy import special
- import scipy._lib.array_api_extra as xpx
- from scipy._lib._util import check_random_state
- from scipy.linalg.blas import drot, get_blas_funcs
- from ._continuous_distns import norm, invgamma
- from ._discrete_distns import binom
- from . import _covariance, _rcont
- from ._qmvnt import _qmvt, _qmvn, _qauto
- from ._morestats import directional_stats
- from scipy.optimize import root_scalar
- __all__ = ['multivariate_normal',
- 'matrix_normal',
- 'dirichlet',
- 'dirichlet_multinomial',
- 'wishart',
- 'invwishart',
- 'multinomial',
- 'special_ortho_group',
- 'ortho_group',
- 'random_correlation',
- 'unitary_group',
- 'multivariate_t',
- 'multivariate_hypergeom',
- 'random_table',
- 'uniform_direction',
- 'vonmises_fisher',
- 'normal_inverse_gamma',
- 'matrix_t']
- _LOG_2PI = np.log(2 * np.pi)
- _LOG_2 = np.log(2)
- _LOG_PI = np.log(np.pi)
- MVN_LOCK = threading.Lock()
- _doc_random_state = """\
- seed : {None, int, np.random.RandomState, np.random.Generator}, optional
- Used for drawing random variates.
- If `seed` is `None`, the `~np.random.RandomState` singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used, seeded
- with seed.
- If `seed` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- """
- def _squeeze_output(out):
- """
- Remove single-dimensional entries from array and convert to scalar,
- if necessary.
- """
- out = out.squeeze()
- if out.ndim == 0:
- out = out[()]
- return out
- def _eigvalsh_to_eps(spectrum, cond=None, rcond=None):
- """Determine which eigenvalues are "small" given the spectrum.
- This is for compatibility across various linear algebra functions
- that should agree about whether or not a Hermitian matrix is numerically
- singular and what is its numerical matrix rank.
- This is designed to be compatible with scipy.linalg.pinvh.
- Parameters
- ----------
- spectrum : 1d ndarray
- Array of eigenvalues of a Hermitian matrix.
- cond, rcond : float, optional
- Cutoff for small eigenvalues.
- Singular values smaller than rcond * largest_eigenvalue are
- considered zero.
- If None or -1, suitable machine precision is used.
- Returns
- -------
- eps : float
- Magnitude cutoff for numerical negligibility.
- """
- if rcond is not None:
- cond = rcond
- if cond in [None, -1]:
- t = spectrum.dtype.char.lower()
- factor = {'f': 1E3, 'd': 1E6}
- cond = factor[t] * np.finfo(t).eps
- eps = cond * np.max(abs(spectrum))
- return eps
- def _pinv_1d(v, eps=1e-5):
- """A helper function for computing the pseudoinverse.
- Parameters
- ----------
- v : iterable of numbers
- This may be thought of as a vector of eigenvalues or singular values.
- eps : float
- Values with magnitude no greater than eps are considered negligible.
- Returns
- -------
- v_pinv : 1d float ndarray
- A vector of pseudo-inverted numbers.
- """
- return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float)
- def _validate_marginal_input(dimensions, multivariate_dims):
- """Determine if input dimensions can be marginalized.
- Parameters
- ----------
- dimensions : float, ndarray
- Input dimensions to be marginalized
- multivariate_dims : int
- Number of dimensions of multivariate distribution.
- Returns
- -------
- dims : ndarray
- Array of indices to marginalize
- """
- dims = np.copy(dimensions)
- dims = np.atleast_1d(dims)
- if len(dims) == 0:
- msg = "Cannot marginalize all dimensions."
- raise ValueError(msg)
- if not np.issubdtype(dims.dtype, np.integer):
- msg = ("Elements of `dimensions` must be integers - the indices "
- "of the marginal variables being retained.")
- raise ValueError(msg)
- original_dims = np.copy(dims)
- dims[dims < 0] += multivariate_dims
- if len(np.unique(dims)) != len(dims):
- msg = "All elements of `dimensions` must be unique."
- raise ValueError(msg)
- i_invalid = (dims < 0) | (dims >= multivariate_dims)
- if np.any(i_invalid):
- msg = (f"Dimensions {original_dims[i_invalid]} are invalid "
- f"for a distribution in {multivariate_dims} dimensions.")
- raise ValueError(msg)
- return dims
- class _PSD:
- """
- Compute coordinated functions of a symmetric positive semidefinite matrix.
- This class addresses two issues. Firstly it allows the pseudoinverse,
- the logarithm of the pseudo-determinant, and the rank of the matrix
- to be computed using one call to eigh instead of three.
- Secondly it allows these functions to be computed in a way
- that gives mutually compatible results.
- All of the functions are computed with a common understanding as to
- which of the eigenvalues are to be considered negligibly small.
- The functions are designed to coordinate with scipy.linalg.pinvh()
- but not necessarily with np.linalg.det() or with np.linalg.matrix_rank().
- Parameters
- ----------
- M : array_like
- Symmetric positive semidefinite matrix (2-D).
- cond, rcond : float, optional
- Cutoff for small eigenvalues.
- Singular values smaller than rcond * largest_eigenvalue are
- considered zero.
- If None or -1, suitable machine precision is used.
- lower : bool, optional
- Whether the pertinent array data is taken from the lower
- or upper triangle of M. (Default: lower)
- check_finite : bool, optional
- Whether to check that the input matrices contain only finite
- numbers. Disabling may give a performance gain, but may result
- in problems (crashes, non-termination) if the inputs do contain
- infinities or NaNs.
- allow_singular : bool, optional
- Whether to allow a singular matrix. (Default: True)
- Notes
- -----
- The arguments are similar to those of scipy.linalg.pinvh().
- """
- def __init__(self, M, cond=None, rcond=None, lower=True,
- check_finite=True, allow_singular=True):
- self._M = np.asarray(M)
- # Compute the symmetric eigendecomposition.
- # Note that eigh takes care of array conversion, chkfinite,
- # and assertion that the matrix is square.
- s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite)
- eps = _eigvalsh_to_eps(s, cond, rcond)
- if np.min(s) < -eps:
- msg = "The input matrix must be symmetric positive semidefinite."
- raise ValueError(msg)
- d = s[s > eps]
- if len(d) < len(s) and not allow_singular:
- msg = ("When `allow_singular is False`, the input matrix must be "
- "symmetric positive definite.")
- raise np.linalg.LinAlgError(msg)
- s_pinv = _pinv_1d(s, eps)
- U = np.multiply(u, np.sqrt(s_pinv))
- # Save the eigenvector basis, and tolerance for testing support
- self.eps = 1e3*eps
- self.V = u[:, s <= eps]
- # Initialize the eagerly precomputed attributes.
- self.rank = len(d)
- self.U = U
- self.log_pdet = np.sum(np.log(d))
- # Initialize attributes to be lazily computed.
- self._pinv = None
- def _support_mask(self, x):
- """
- Check whether x lies in the support of the distribution.
- """
- residual = np.linalg.norm(x @ self.V, axis=-1)
- in_support = residual < self.eps
- return in_support
- @property
- def pinv(self):
- if self._pinv is None:
- self._pinv = np.dot(self.U, self.U.T)
- return self._pinv
- class multi_rv_generic:
- """
- Class which encapsulates common functionality between all multivariate
- distributions.
- """
- def __init__(self, seed=None):
- super().__init__()
- self._random_state = check_random_state(seed)
- @property
- def random_state(self):
- """ Get or set the Generator object for generating random variates.
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance then
- that instance is used.
- """
- return self._random_state
- @random_state.setter
- def random_state(self, seed):
- self._random_state = check_random_state(seed)
- def _get_random_state(self, random_state):
- if random_state is not None:
- return check_random_state(random_state)
- else:
- return self._random_state
- class multi_rv_frozen:
- """
- Class which encapsulates common functionality between all frozen
- multivariate distributions.
- """
- # generic type compatibility with scipy-stubs
- __class_getitem__ = classmethod(types.GenericAlias)
- @property
- def random_state(self):
- return self._dist._random_state
- @random_state.setter
- def random_state(self, seed):
- self._dist._random_state = check_random_state(seed)
- _mvn_doc_default_callparams = """\
- mean : array_like, default: ``[0]``
- Mean of the distribution.
- cov : array_like or `Covariance`, default: ``[1]``
- Symmetric positive (semi)definite covariance matrix of the distribution.
- allow_singular : bool, default: ``False``
- Whether to allow a singular covariance matrix. This is ignored if `cov` is
- a `Covariance` object.
- """
- _mvn_doc_callparams_note = """\
- Setting the parameter `mean` to `None` is equivalent to having `mean`
- be the zero-vector. The parameter `cov` can be a scalar, in which case
- the covariance matrix is the identity times that value, a vector of
- diagonal entries for the covariance matrix, a two-dimensional array_like,
- or a `Covariance` object.
- """
- _mvn_doc_frozen_callparams = ""
- _mvn_doc_frozen_callparams_note = """\
- See class definition for a detailed description of parameters."""
- mvn_docdict_params = {
- '_mvn_doc_default_callparams': _mvn_doc_default_callparams,
- '_mvn_doc_callparams_note': _mvn_doc_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- mvn_docdict_noparams = {
- '_mvn_doc_default_callparams': _mvn_doc_frozen_callparams,
- '_mvn_doc_callparams_note': _mvn_doc_frozen_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- class multivariate_normal_gen(multi_rv_generic):
- r"""A multivariate normal random variable.
- The `mean` keyword specifies the mean. The `cov` keyword specifies the
- covariance matrix.
- Methods
- -------
- pdf(x, mean=None, cov=1, allow_singular=False)
- Probability density function.
- logpdf(x, mean=None, cov=1, allow_singular=False)
- Log of the probability density function.
- cdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5, lower_limit=None)
- Cumulative distribution function.
- logcdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)
- Log of the cumulative distribution function.
- rvs(mean=None, cov=1, size=1, random_state=None)
- Draw random samples from a multivariate normal distribution.
- entropy(mean=None, cov=1)
- Compute the differential entropy of the multivariate normal.
- marginal(dimensions, mean=None, cov=1, allow_singular=False)
- Return a marginal multivariate normal distribution.
- fit(x, fix_mean=None, fix_cov=None)
- Fit a multivariate normal distribution to data.
- Parameters
- ----------
- %(_mvn_doc_default_callparams)s
- %(_doc_random_state)s
- Notes
- -----
- %(_mvn_doc_callparams_note)s
- The covariance matrix `cov` may be an instance of a subclass of
- `Covariance`, e.g. `scipy.stats.CovViaPrecision`. If so, `allow_singular`
- is ignored.
- Otherwise, `cov` must be a symmetric positive semidefinite
- matrix when `allow_singular` is True; it must be (strictly) positive
- definite when `allow_singular` is False.
- Symmetry is not checked; only the lower triangular portion is used.
- The determinant and inverse of `cov` are computed
- as the pseudo-determinant and pseudo-inverse, respectively, so
- that `cov` does not need to have full rank.
- The probability density function for `multivariate_normal` is
- .. math::
- f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}}
- \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right),
- where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,
- :math:`k` the rank of :math:`\Sigma`. In case of singular :math:`\Sigma`,
- SciPy extends this definition according to [1]_.
- .. versionadded:: 0.14.0
- References
- ----------
- .. [1] Multivariate Normal Distribution - Degenerate Case, Wikipedia,
- https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case
- Examples
- --------
- >>> import numpy as np
- >>> import matplotlib.pyplot as plt
- >>> from scipy.stats import multivariate_normal
- >>> x = np.linspace(0, 5, 10, endpoint=False)
- >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y
- array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129,
- 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349])
- >>> fig1 = plt.figure()
- >>> ax = fig1.add_subplot(111)
- >>> ax.plot(x, y)
- >>> plt.show()
- Alternatively, the object may be called (as a function) to fix the mean
- and covariance parameters, returning a "frozen" multivariate normal
- random variable:
- >>> rv = multivariate_normal(mean=None, cov=1, allow_singular=False)
- >>> # Frozen object with the same methods but holding the given
- >>> # mean and covariance fixed.
- The input quantiles can be any shape of array, as long as the last
- axis labels the components. This allows us for instance to
- display the frozen pdf for a non-isotropic random variable in 2D as
- follows:
- >>> x, y = np.mgrid[-1:1:.01, -1:1:.01]
- >>> pos = np.dstack((x, y))
- >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])
- >>> fig2 = plt.figure()
- >>> ax2 = fig2.add_subplot(111)
- >>> ax2.contourf(x, y, rv.pdf(pos))
- """ # noqa: E501
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__, mvn_docdict_params)
- def __call__(self, mean=None, cov=1, allow_singular=False, seed=None, **kwds):
- """Create a frozen multivariate normal distribution.
- See `multivariate_normal_frozen` for more information.
- """
- return multivariate_normal_frozen(mean, cov,
- allow_singular=allow_singular,
- seed=seed, **kwds)
- def _process_parameters(self, mean, cov, allow_singular=True):
- """
- Infer dimensionality from mean or covariance matrix, ensure that
- mean and covariance are full vector resp. matrix.
- """
- if isinstance(cov, _covariance.Covariance):
- return self._process_parameters_Covariance(mean, cov)
- else:
- # Before `Covariance` classes were introduced,
- # `multivariate_normal` accepted plain arrays as `cov` and used the
- # following input validation. To avoid disturbing the behavior of
- # `multivariate_normal` when plain arrays are used, we use the
- # original input validation here.
- dim, mean, cov = self._process_parameters_psd(None, mean, cov)
- # After input validation, some methods then processed the arrays
- # with a `_PSD` object and used that to perform computation.
- # To avoid branching statements in each method depending on whether
- # `cov` is an array or `Covariance` object, we always process the
- # array with `_PSD`, and then use wrapper that satisfies the
- # `Covariance` interface, `CovViaPSD`.
- psd = _PSD(cov, allow_singular=allow_singular)
- cov_object = _covariance.CovViaPSD(psd)
- return dim, mean, cov_object
- def _process_parameters_Covariance(self, mean, cov):
- dim = cov.shape[-1]
- mean = np.array([0.]) if mean is None else mean
- message = (f"`cov` represents a covariance matrix in {dim} dimensions,"
- f"and so `mean` must be broadcastable to shape {(dim,)}")
- try:
- mean = np.broadcast_to(mean, dim)
- except ValueError as e:
- raise ValueError(message) from e
- return dim, mean, cov
- def _process_parameters_psd(self, dim, mean, cov):
- # Try to infer dimensionality
- if dim is None:
- if mean is None:
- if cov is None:
- dim = 1
- else:
- cov = np.asarray(cov, dtype=float)
- if cov.ndim < 2:
- dim = 1
- else:
- dim = cov.shape[0]
- else:
- mean = np.asarray(mean, dtype=float)
- dim = mean.size
- else:
- if not np.isscalar(dim):
- raise ValueError("Dimension of random variable must be "
- "a scalar.")
- # Check input sizes and return full arrays for mean and cov if
- # necessary
- if mean is None:
- mean = np.zeros(dim)
- mean = np.asarray(mean, dtype=float)
- if cov is None:
- cov = 1.0
- cov = np.asarray(cov, dtype=float)
- if dim == 1:
- mean = mean.reshape(1)
- cov = cov.reshape(1, 1)
- if mean.ndim != 1 or mean.shape[0] != dim:
- raise ValueError(f"Array 'mean' must be a vector of length {dim}.")
- if cov.ndim == 0:
- cov = cov * np.eye(dim)
- elif cov.ndim == 1:
- cov = np.diag(cov)
- elif cov.ndim == 2 and cov.shape != (dim, dim):
- rows, cols = cov.shape
- if rows != cols:
- msg = ("Array 'cov' must be square if it is two dimensional,"
- f" but cov.shape = {str(cov.shape)}.")
- else:
- msg = (f"Dimension mismatch: array 'cov' is of shape {cov.shape}, "
- f"but 'mean' is a vector of length {len(mean)}.")
- raise ValueError(msg)
- elif cov.ndim > 2:
- raise ValueError(f"Array 'cov' must be at most two-dimensional, "
- f"but cov.ndim = {cov.ndim}")
- return dim, mean, cov
- def _process_quantiles(self, x, dim):
- """
- Adjust quantiles array so that last axis labels the components of
- each data point.
- """
- x = np.asarray(x, dtype=float)
- if x.ndim == 0:
- x = x[np.newaxis]
- elif x.ndim == 1:
- if dim == 1:
- x = x[:, np.newaxis]
- else:
- x = x[np.newaxis, :]
- return x
- def _logpdf(self, x, mean, cov_object):
- """Log of the multivariate normal probability density function.
- Parameters
- ----------
- x : ndarray
- Points at which to evaluate the log of the probability
- density function
- mean : ndarray
- Mean of the distribution
- cov_object : Covariance
- An object representing the Covariance matrix
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'logpdf' instead.
- """
- log_det_cov, rank = cov_object.log_pdet, cov_object.rank
- dev = x - mean
- if dev.ndim > 1:
- log_det_cov = log_det_cov[..., np.newaxis]
- rank = rank[..., np.newaxis]
- maha = np.sum(np.square(cov_object.whiten(dev)), axis=-1)
- return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
- def logpdf(self, x, mean=None, cov=1, allow_singular=False):
- """Log of the multivariate normal probability density function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_mvn_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray or scalar
- Log of the probability density function evaluated at `x`
- Notes
- -----
- %(_mvn_doc_callparams_note)s
- """
- params = self._process_parameters(mean, cov, allow_singular)
- dim, mean, cov_object = params
- x = self._process_quantiles(x, dim)
- out = self._logpdf(x, mean, cov_object)
- if np.any(cov_object.rank < dim):
- out_of_bounds = ~cov_object._support_mask(x-mean)
- out[out_of_bounds] = -np.inf
- return _squeeze_output(out)
- def pdf(self, x, mean=None, cov=1, allow_singular=False):
- """Multivariate normal probability density function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_mvn_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray or scalar
- Probability density function evaluated at `x`
- Notes
- -----
- %(_mvn_doc_callparams_note)s
- """
- params = self._process_parameters(mean, cov, allow_singular)
- dim, mean, cov_object = params
- x = self._process_quantiles(x, dim)
- out = np.exp(self._logpdf(x, mean, cov_object))
- if np.any(cov_object.rank < dim):
- out_of_bounds = ~cov_object._support_mask(x-mean)
- out[out_of_bounds] = 0.0
- return _squeeze_output(out)
- def _cdf(self, x, mean, cov, maxpts, abseps, releps, lower_limit, rng):
- """Multivariate normal cumulative distribution function.
- Parameters
- ----------
- x : ndarray
- Points at which to evaluate the cumulative distribution function.
- mean : ndarray
- Mean of the distribution
- cov : array_like
- Covariance matrix of the distribution
- maxpts : integer
- The maximum number of points to use for integration
- abseps : float
- Absolute error tolerance
- releps : float
- Relative error tolerance
- lower_limit : array_like, optional
- Lower limit of integration of the cumulative distribution function.
- Default is negative infinity. Must be broadcastable with `x`.
- rng : Generator
- an instance of ``np.random.Generator``, which is used internally
- for QMC integration.
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'cdf' instead.
- .. versionadded:: 1.0.0
- """
- lower = (np.full(mean.shape, -np.inf)
- if lower_limit is None else lower_limit)
- # In 2d, _mvn.mvnun accepts input in which `lower` bound elements
- # are greater than `x`. Not so in other dimensions. Fix this by
- # ensuring that lower bounds are indeed lower when passed, then
- # set signs of resulting CDF manually.
- b, a = np.broadcast_arrays(x, lower)
- b, a = b - mean, a - mean # _qmvn only accepts zero mean
- i_swap = b < a
- signs = (-1)**(i_swap.sum(axis=-1)) # odd # of swaps -> negative
- a, b = a.copy(), b.copy()
- a[i_swap], b[i_swap] = b[i_swap], a[i_swap]
- n = x.shape[-1]
- limits = np.concatenate((a, b), axis=-1)
- # qmvn expects 1-d arguments, so process points sequentially
- # XXX: if cov.ndim == 2 and limits.ndim == 1, can avoid apply_along_axis
- def func1d(limits):
- # res0 = _qmvn(maxpts, cov, limits[:n], limits[n:], rng)[0]
- res = _qauto(_qmvn, cov, limits[:n], limits[n:],
- rng, error=abseps, limit=maxpts, n_batches=10)
- return np.squeeze(res[0])
- out = np.apply_along_axis(func1d, -1, limits) * signs
- return _squeeze_output(out)
- def logcdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
- abseps=1e-5, releps=1e-5, *, lower_limit=None, rng=None):
- """Log of the multivariate normal cumulative distribution function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_mvn_doc_default_callparams)s
- maxpts : integer, optional
- The maximum number of points to use for integration
- (default ``1000000*dim``)
- abseps : float, optional
- Absolute error tolerance (default 1e-5)
- releps : float, optional
- Relative error tolerance (default 1e-5)
- lower_limit : array_like, optional
- Lower limit of integration of the cumulative distribution function.
- Default is negative infinity. Must be broadcastable with `x`.
- rng : Generator, optional
- an instance of ``np.random.Generator``, which is used internally
- for QMC integration.
- Returns
- -------
- cdf : ndarray or scalar
- Log of the cumulative distribution function evaluated at `x`
- Notes
- -----
- %(_mvn_doc_callparams_note)s
- .. versionadded:: 1.0.0
- """
- params = self._process_parameters(mean, cov, allow_singular)
- dim, mean, cov_object = params
- cov = cov_object.covariance
- x = self._process_quantiles(x, dim)
- if not maxpts:
- maxpts = 1000000 * dim
- rng = self._get_random_state(rng)
- cdf = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit, rng)
- # the log of a negative real is complex, and cdf can be negative
- # if lower limit is greater than upper limit
- cdf = cdf + 0j if np.any(cdf < 0) else cdf
- out = np.log(cdf)
- return out
- def cdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
- abseps=1e-5, releps=1e-5, *, lower_limit=None, rng=None):
- """Multivariate normal cumulative distribution function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_mvn_doc_default_callparams)s
- maxpts : integer, optional
- The maximum number of points to use for integration
- (default ``1000000*dim``)
- abseps : float, optional
- Absolute error tolerance (default 1e-5)
- releps : float, optional
- Relative error tolerance (default 1e-5)
- lower_limit : array_like, optional
- Lower limit of integration of the cumulative distribution function.
- Default is negative infinity. Must be broadcastable with `x`.
- rng : Generator, optional
- an instance of ``np.random.Generator``, which is used internally
- for QMC integration.
- Returns
- -------
- cdf : ndarray or scalar
- Cumulative distribution function evaluated at `x`
- Notes
- -----
- %(_mvn_doc_callparams_note)s
- .. versionadded:: 1.0.0
- """
- params = self._process_parameters(mean, cov, allow_singular)
- dim, mean, cov_object = params
- cov = cov_object.covariance
- x = self._process_quantiles(x, dim)
- if not maxpts:
- maxpts = 1000000 * dim
- rng = self._get_random_state(rng)
- out = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit, rng)
- return out
- def rvs(self, mean=None, cov=1, size=1, random_state=None):
- """Draw random samples from a multivariate normal distribution.
- Parameters
- ----------
- %(_mvn_doc_default_callparams)s
- size : integer, optional
- Number of samples to draw (default 1).
- %(_doc_random_state)s
- Returns
- -------
- rvs : ndarray or scalar
- Random variates of size (`size`, `N`), where `N` is the
- dimension of the random variable.
- Notes
- -----
- %(_mvn_doc_callparams_note)s
- """
- dim, mean, cov_object = self._process_parameters(mean, cov)
- random_state = self._get_random_state(random_state)
- if isinstance(cov_object, _covariance.CovViaPSD):
- cov = cov_object.covariance
- out = random_state.multivariate_normal(mean, cov, size)
- out = _squeeze_output(out)
- else:
- size = size or tuple()
- if not np.iterable(size):
- size = (size,)
- shape = tuple(size) + (cov_object.shape[-1],)
- x = random_state.normal(size=shape)
- out = mean + cov_object.colorize(x)
- return out
- def entropy(self, mean=None, cov=1):
- """Compute the differential entropy of the multivariate normal.
- Parameters
- ----------
- %(_mvn_doc_default_callparams)s
- Returns
- -------
- h : scalar
- Entropy of the multivariate normal distribution
- Notes
- -----
- %(_mvn_doc_callparams_note)s
- """
- dim, mean, cov_object = self._process_parameters(mean, cov)
- return 0.5 * (cov_object.rank * (_LOG_2PI + 1) + cov_object.log_pdet)
- def fit(self, x, fix_mean=None, fix_cov=None):
- """Fit a multivariate normal distribution to data.
- Parameters
- ----------
- x : ndarray (m, n)
- Data the distribution is fitted to. Must have two axes.
- The first axis of length `m` represents the number of vectors
- the distribution is fitted to. The second axis of length `n`
- determines the dimensionality of the fitted distribution.
- fix_mean : ndarray(n, )
- Fixed mean vector. Must have length `n`.
- fix_cov: ndarray (n, n)
- Fixed covariance matrix. Must have shape ``(n, n)``.
- Returns
- -------
- mean : ndarray (n, )
- Maximum likelihood estimate of the mean vector
- cov : ndarray (n, n)
- Maximum likelihood estimate of the covariance matrix
- """
- # input validation for data to be fitted
- x = np.asarray(x)
- if x.ndim != 2:
- raise ValueError("`x` must be two-dimensional.")
- n_vectors, dim = x.shape
- # parameter estimation
- # reference: https://home.ttic.edu/~shubhendu/Slides/Estimation.pdf
- if fix_mean is not None:
- # input validation for `fix_mean`
- fix_mean = np.atleast_1d(fix_mean)
- if fix_mean.shape != (dim, ):
- msg = ("`fix_mean` must be a one-dimensional array the same "
- "length as the dimensionality of the vectors `x`.")
- raise ValueError(msg)
- mean = fix_mean
- else:
- mean = x.mean(axis=0)
- if fix_cov is not None:
- # input validation for `fix_cov`
- fix_cov = np.atleast_2d(fix_cov)
- # validate shape
- if fix_cov.shape != (dim, dim):
- msg = ("`fix_cov` must be a two-dimensional square array "
- "of same side length as the dimensionality of the "
- "vectors `x`.")
- raise ValueError(msg)
- # validate positive semidefiniteness
- # a trimmed down copy from _PSD
- s, u = scipy.linalg.eigh(fix_cov, lower=True, check_finite=True)
- eps = _eigvalsh_to_eps(s)
- if np.min(s) < -eps:
- msg = "`fix_cov` must be symmetric positive semidefinite."
- raise ValueError(msg)
- cov = fix_cov
- else:
- centered_data = x - mean
- cov = centered_data.T @ centered_data / n_vectors
- return mean, cov
- def marginal(self, dimensions, mean=None, cov=1, allow_singular=False):
- """Return a marginal multivariate normal distribution.
- Parameters
- ----------
- dimensions : int or 1-d array_like
- The dimensions of the multivariate distribution corresponding
- with the marginal variables, that is, the indices of the dimensions
- that are being retained. The other dimensions are marginalized out.
- %(_mvn_doc_default_callparams)s
- Returns
- -------
- marginal_multivariate_normal : multivariate_normal_frozen
- An object representing the marginal distribution.
- Notes
- -----
- %(_mvn_doc_callparams_note)s
- """
- params = self._process_parameters(mean, cov, allow_singular)
- n, mean, cov_object = params
- dims = _validate_marginal_input(dimensions, n)
- mean = mean[dims]
- cov = cov_object.covariance[np.ix_(dims, dims)]
- return multivariate_normal_frozen(mean, cov, allow_singular)
- multivariate_normal = multivariate_normal_gen()
- class multivariate_normal_frozen(multi_rv_frozen):
- __class_getitem__ = None
- def __init__(self, mean=None, cov=1, allow_singular=False, seed=None,
- maxpts=None, abseps=1e-5, releps=1e-5):
- """Create a frozen multivariate normal distribution.
- Parameters
- ----------
- mean : array_like, default: ``[0]``
- Mean of the distribution.
- cov : array_like, default: ``[1]``
- Symmetric positive (semi)definite covariance matrix of the
- distribution.
- allow_singular : bool, default: ``False``
- Whether to allow a singular covariance matrix.
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- maxpts : integer, optional
- The maximum number of points to use for integration of the
- cumulative distribution function (default ``1000000*dim``)
- abseps : float, optional
- Absolute error tolerance for the cumulative distribution function
- (default 1e-5)
- releps : float, optional
- Relative error tolerance for the cumulative distribution function
- (default 1e-5)
- Examples
- --------
- When called with the default parameters, this will create a 1D random
- variable with mean 0 and covariance 1:
- >>> from scipy.stats import multivariate_normal
- >>> r = multivariate_normal()
- >>> r.mean
- array([ 0.])
- >>> r.cov
- array([[1.]])
- """ # numpy/numpydoc#87 # noqa: E501
- self._dist = multivariate_normal_gen(seed)
- self.dim, self.mean, self.cov_object = (
- self._dist._process_parameters(mean, cov, allow_singular))
- self.allow_singular = allow_singular or self.cov_object._allow_singular
- if not maxpts:
- maxpts = 1000000 * self.dim
- self.maxpts = maxpts
- self.abseps = abseps
- self.releps = releps
- @property
- def cov(self):
- return self.cov_object.covariance
- def logpdf(self, x):
- x = self._dist._process_quantiles(x, self.dim)
- out = self._dist._logpdf(x, self.mean, self.cov_object)
- if np.any(self.cov_object.rank < self.dim):
- out_of_bounds = ~self.cov_object._support_mask(x-self.mean)
- out[out_of_bounds] = -np.inf
- return _squeeze_output(out)
- def pdf(self, x):
- return np.exp(self.logpdf(x))
- def logcdf(self, x, *, lower_limit=None, rng=None):
- cdf = self.cdf(x, lower_limit=lower_limit, rng=rng)
- # the log of a negative real is complex, and cdf can be negative
- # if lower limit is greater than upper limit
- cdf = cdf + 0j if np.any(cdf < 0) else cdf
- out = np.log(cdf)
- return out
- def cdf(self, x, *, lower_limit=None, rng=None):
- x = self._dist._process_quantiles(x, self.dim)
- rng = self._dist._get_random_state(rng)
- out = self._dist._cdf(x, self.mean, self.cov_object.covariance,
- self.maxpts, self.abseps, self.releps,
- lower_limit, rng)
- return _squeeze_output(out)
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(self.mean, self.cov_object, size, random_state)
- def entropy(self):
- """Computes the differential entropy of the multivariate normal.
- Returns
- -------
- h : scalar
- Entropy of the multivariate normal distribution
- """
- log_pdet = self.cov_object.log_pdet
- rank = self.cov_object.rank
- return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet)
- def marginal(self, dimensions):
- return self._dist.marginal(dimensions, self.mean,
- self.cov_object, self.allow_singular)
- # Set frozen generator docstrings from corresponding docstrings in
- # multivariate_normal_gen and fill in default strings in class docstrings
- for name in ['logpdf', 'pdf', 'logcdf', 'cdf', 'rvs']:
- method = multivariate_normal_gen.__dict__[name]
- method_frozen = multivariate_normal_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(method.__doc__,
- mvn_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params)
- _matnorm_doc_default_callparams = """\
- mean : array_like, optional
- Mean of the distribution (default: `None`)
- rowcov : array_like, optional
- Among-row covariance matrix of the distribution (default: ``1``)
- colcov : array_like, optional
- Among-column covariance matrix of the distribution (default: ``1``)
- """
- _matnorm_doc_callparams_note = """\
- If `mean` is set to `None` then a matrix of zeros is used for the mean.
- The dimensions of this matrix are inferred from the shape of `rowcov` and
- `colcov`, if these are provided, or set to ``1`` if ambiguous.
- `rowcov` and `colcov` can be two-dimensional array_likes specifying the
- covariance matrices directly. Alternatively, a one-dimensional array will
- be be interpreted as the entries of a diagonal matrix, and a scalar or
- zero-dimensional array will be interpreted as this value times the
- identity matrix.
- """
- _matnorm_doc_frozen_callparams = ""
- _matnorm_doc_frozen_callparams_note = """\
- See class definition for a detailed description of parameters."""
- matnorm_docdict_params = {
- '_matnorm_doc_default_callparams': _matnorm_doc_default_callparams,
- '_matnorm_doc_callparams_note': _matnorm_doc_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- matnorm_docdict_noparams = {
- '_matnorm_doc_default_callparams': _matnorm_doc_frozen_callparams,
- '_matnorm_doc_callparams_note': _matnorm_doc_frozen_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- class matrix_normal_gen(multi_rv_generic):
- r"""A matrix normal random variable.
- The `mean` keyword specifies the mean. The `rowcov` keyword specifies the
- among-row covariance matrix. The 'colcov' keyword specifies the
- among-column covariance matrix.
- Methods
- -------
- pdf(X, mean=None, rowcov=1, colcov=1)
- Probability density function.
- logpdf(X, mean=None, rowcov=1, colcov=1)
- Log of the probability density function.
- rvs(mean=None, rowcov=1, colcov=1, size=1, random_state=None)
- Draw random samples.
- entropy(rowcol=1, colcov=1)
- Differential entropy.
- Parameters
- ----------
- %(_matnorm_doc_default_callparams)s
- %(_doc_random_state)s
- Notes
- -----
- %(_matnorm_doc_callparams_note)s
- The covariance matrices specified by `rowcov` and `colcov` must be
- (symmetric) positive definite. If the samples in `X` are
- :math:`m \times n`, then `rowcov` must be :math:`m \times m` and
- `colcov` must be :math:`n \times n`. `mean` must be the same shape as `X`.
- The probability density function for `matrix_normal` is
- .. math::
- f(X) = (2 \pi)^{-\frac{mn}{2}}|U|^{-\frac{n}{2}} |V|^{-\frac{m}{2}}
- \exp\left( -\frac{1}{2} \mathrm{Tr}\left[ U^{-1} (X-M) V^{-1}
- (X-M)^T \right] \right),
- where :math:`M` is the mean, :math:`U` the among-row covariance matrix,
- :math:`V` the among-column covariance matrix.
- The `allow_singular` behaviour of the `multivariate_normal`
- distribution is not currently supported. Covariance matrices must be
- full rank.
- The `matrix_normal` distribution is closely related to the
- `multivariate_normal` distribution. Specifically, :math:`\mathrm{Vec}(X)`
- (the vector formed by concatenating the columns of :math:`X`) has a
- multivariate normal distribution with mean :math:`\mathrm{Vec}(M)`
- and covariance :math:`V \otimes U` (where :math:`\otimes` is the Kronecker
- product). Sampling and pdf evaluation are
- :math:`\mathcal{O}(m^3 + n^3 + m^2 n + m n^2)` for the matrix normal, but
- :math:`\mathcal{O}(m^3 n^3)` for the equivalent multivariate normal,
- making this equivalent form algorithmically inefficient.
- .. versionadded:: 0.17.0
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import matrix_normal
- >>> M = np.arange(6).reshape(3,2); M
- array([[0, 1],
- [2, 3],
- [4, 5]])
- >>> U = np.diag([1,2,3]); U
- array([[1, 0, 0],
- [0, 2, 0],
- [0, 0, 3]])
- >>> V = 0.3*np.identity(2); V
- array([[ 0.3, 0. ],
- [ 0. , 0.3]])
- >>> X = M + 0.1; X
- array([[ 0.1, 1.1],
- [ 2.1, 3.1],
- [ 4.1, 5.1]])
- >>> matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V)
- 0.023410202050005054
- >>> # Equivalent multivariate normal
- >>> from scipy.stats import multivariate_normal
- >>> vectorised_X = X.T.flatten()
- >>> equiv_mean = M.T.flatten()
- >>> equiv_cov = np.kron(V,U)
- >>> multivariate_normal.pdf(vectorised_X, mean=equiv_mean, cov=equiv_cov)
- 0.023410202050005054
- Alternatively, the object may be called (as a function) to fix the mean
- and covariance parameters, returning a "frozen" matrix normal
- random variable:
- >>> rv = matrix_normal(mean=None, rowcov=1, colcov=1)
- >>> # Frozen object with the same methods but holding the given
- >>> # mean and covariance fixed.
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__, matnorm_docdict_params)
- def __call__(self, mean=None, rowcov=1, colcov=1, seed=None):
- """Create a frozen matrix normal distribution.
- See `matrix_normal_frozen` for more information.
- """
- return matrix_normal_frozen(mean, rowcov, colcov, seed=seed)
- def _process_parameters(self, mean, rowcov, colcov):
- """
- Infer dimensionality from mean or covariance matrices. Handle
- defaults. Ensure compatible dimensions.
- """
- # Process mean
- if mean is not None:
- mean = np.asarray(mean, dtype=float)
- meanshape = mean.shape
- if len(meanshape) != 2:
- raise ValueError("Array `mean` must be two dimensional.")
- if np.any(meanshape == 0):
- raise ValueError("Array `mean` has invalid shape.")
- # Process among-row covariance
- rowcov = np.asarray(rowcov, dtype=float)
- if rowcov.ndim == 0:
- if mean is not None:
- rowcov = rowcov * np.identity(meanshape[0])
- else:
- rowcov = rowcov * np.identity(1)
- elif rowcov.ndim == 1:
- rowcov = np.diag(rowcov)
- rowshape = rowcov.shape
- if len(rowshape) != 2:
- raise ValueError("`rowcov` must be a scalar or a 2D array.")
- if rowshape[0] != rowshape[1]:
- raise ValueError("Array `rowcov` must be square.")
- if rowshape[0] == 0:
- raise ValueError("Array `rowcov` has invalid shape.")
- numrows = rowshape[0]
- # Process among-column covariance
- colcov = np.asarray(colcov, dtype=float)
- if colcov.ndim == 0:
- if mean is not None:
- colcov = colcov * np.identity(meanshape[1])
- else:
- colcov = colcov * np.identity(1)
- elif colcov.ndim == 1:
- colcov = np.diag(colcov)
- colshape = colcov.shape
- if len(colshape) != 2:
- raise ValueError("`colcov` must be a scalar or a 2D array.")
- if colshape[0] != colshape[1]:
- raise ValueError("Array `colcov` must be square.")
- if colshape[0] == 0:
- raise ValueError("Array `colcov` has invalid shape.")
- numcols = colshape[0]
- # Ensure mean and covariances compatible
- if mean is not None:
- if meanshape[0] != numrows:
- raise ValueError("Arrays `mean` and `rowcov` must have the "
- "same number of rows.")
- if meanshape[1] != numcols:
- raise ValueError("Arrays `mean` and `colcov` must have the "
- "same number of columns.")
- else:
- mean = np.zeros((numrows, numcols))
- dims = (numrows, numcols)
- return dims, mean, rowcov, colcov
- def _process_quantiles(self, X, dims):
- """
- Adjust quantiles array so that last two axes labels the components of
- each data point.
- """
- X = np.asarray(X, dtype=float)
- if X.ndim == 2:
- X = X[np.newaxis, :]
- if X.shape[-2:] != dims:
- raise ValueError("The shape of array `X` is not compatible "
- "with the distribution parameters.")
- return X
- def _logpdf(self, dims, X, mean, row_prec_rt, log_det_rowcov,
- col_prec_rt, log_det_colcov):
- """Log of the matrix normal probability density function.
- Parameters
- ----------
- dims : tuple
- Dimensions of the matrix variates
- X : ndarray
- Points at which to evaluate the log of the probability
- density function
- mean : ndarray
- Mean of the distribution
- row_prec_rt : ndarray
- A decomposition such that np.dot(row_prec_rt, row_prec_rt.T)
- is the inverse of the among-row covariance matrix
- log_det_rowcov : float
- Logarithm of the determinant of the among-row covariance matrix
- col_prec_rt : ndarray
- A decomposition such that np.dot(col_prec_rt, col_prec_rt.T)
- is the inverse of the among-column covariance matrix
- log_det_colcov : float
- Logarithm of the determinant of the among-column covariance matrix
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'logpdf' instead.
- """
- numrows, numcols = dims
- roll_dev = np.moveaxis(X-mean, -1, 0)
- scale_dev = np.tensordot(col_prec_rt.T,
- np.dot(roll_dev, row_prec_rt), 1)
- maha = np.sum(np.sum(np.square(scale_dev), axis=-1), axis=0)
- return -0.5 * (numrows*numcols*_LOG_2PI + numcols*log_det_rowcov
- + numrows*log_det_colcov + maha)
- def logpdf(self, X, mean=None, rowcov=1, colcov=1):
- """Log of the matrix normal probability density function.
- Parameters
- ----------
- X : array_like
- Quantiles, with the last two axes of `X` denoting the components.
- %(_matnorm_doc_default_callparams)s
- Returns
- -------
- logpdf : ndarray
- Log of the probability density function evaluated at `X`
- Notes
- -----
- %(_matnorm_doc_callparams_note)s
- """
- dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
- colcov)
- X = self._process_quantiles(X, dims)
- rowpsd = _PSD(rowcov, allow_singular=False)
- colpsd = _PSD(colcov, allow_singular=False)
- out = self._logpdf(dims, X, mean, rowpsd.U, rowpsd.log_pdet, colpsd.U,
- colpsd.log_pdet)
- return _squeeze_output(out)
- def pdf(self, X, mean=None, rowcov=1, colcov=1):
- """Matrix normal probability density function.
- Parameters
- ----------
- X : array_like
- Quantiles, with the last two axes of `X` denoting the components.
- %(_matnorm_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray
- Probability density function evaluated at `X`
- Notes
- -----
- %(_matnorm_doc_callparams_note)s
- """
- return np.exp(self.logpdf(X, mean, rowcov, colcov))
- def rvs(self, mean=None, rowcov=1, colcov=1, size=1, random_state=None):
- """Draw random samples from a matrix normal distribution.
- Parameters
- ----------
- %(_matnorm_doc_default_callparams)s
- size : integer, optional
- Number of samples to draw (default 1).
- %(_doc_random_state)s
- Returns
- -------
- rvs : ndarray or scalar
- Random variates of size (`size`, `dims`), where `dims` is the
- dimension of the random matrices.
- Notes
- -----
- %(_matnorm_doc_callparams_note)s
- """
- size = int(size)
- dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
- colcov)
- rowchol = scipy.linalg.cholesky(rowcov, lower=True)
- colchol = scipy.linalg.cholesky(colcov, lower=True)
- random_state = self._get_random_state(random_state)
- # We aren't generating standard normal variates with size=(size,
- # dims[0], dims[1]) directly to ensure random variates remain backwards
- # compatible. See https://github.com/scipy/scipy/pull/12312 for more
- # details.
- std_norm = random_state.standard_normal(
- size=(dims[1], size, dims[0])
- ).transpose(1, 2, 0)
- out = mean + np.einsum('jp,ipq,kq->ijk',
- rowchol, std_norm, colchol,
- optimize=True)
- if size == 1:
- out = out.reshape(mean.shape)
- return out
- def entropy(self, rowcov=1, colcov=1):
- """Log of the matrix normal probability density function.
- Parameters
- ----------
- rowcov : array_like, optional
- Among-row covariance matrix of the distribution (default: ``1``)
- colcov : array_like, optional
- Among-column covariance matrix of the distribution (default: ``1``)
- Returns
- -------
- entropy : float
- Entropy of the distribution
- Notes
- -----
- %(_matnorm_doc_callparams_note)s
- """
- dummy_mean = np.zeros((rowcov.shape[0], colcov.shape[0]))
- dims, _, rowcov, colcov = self._process_parameters(dummy_mean,
- rowcov,
- colcov)
- rowpsd = _PSD(rowcov, allow_singular=False)
- colpsd = _PSD(colcov, allow_singular=False)
- return self._entropy(dims, rowpsd.log_pdet, colpsd.log_pdet)
- def _entropy(self, dims, row_cov_logdet, col_cov_logdet):
- n, p = dims
- return (0.5 * n * p * (1 + _LOG_2PI) + 0.5 * p * row_cov_logdet +
- 0.5 * n * col_cov_logdet)
- matrix_normal = matrix_normal_gen()
- class matrix_normal_frozen(multi_rv_frozen):
- """
- Create a frozen matrix normal distribution.
- Parameters
- ----------
- %(_matnorm_doc_default_callparams)s
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is `None` the `~np.random.RandomState` singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used, seeded
- with seed.
- If `seed` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import matrix_normal
- >>> distn = matrix_normal(mean=np.zeros((3,3)))
- >>> X = distn.rvs(); X
- array([[-0.02976962, 0.93339138, -0.09663178],
- [ 0.67405524, 0.28250467, -0.93308929],
- [-0.31144782, 0.74535536, 1.30412916]])
- >>> distn.pdf(X)
- 2.5160642368346784e-05
- >>> distn.logpdf(X)
- -10.590229595124615
- """
- __class_getitem__ = None
- def __init__(self, mean=None, rowcov=1, colcov=1, seed=None):
- self._dist = matrix_normal_gen(seed)
- self.dims, self.mean, self.rowcov, self.colcov = \
- self._dist._process_parameters(mean, rowcov, colcov)
- self.rowpsd = _PSD(self.rowcov, allow_singular=False)
- self.colpsd = _PSD(self.colcov, allow_singular=False)
- def logpdf(self, X):
- X = self._dist._process_quantiles(X, self.dims)
- out = self._dist._logpdf(self.dims, X, self.mean, self.rowpsd.U,
- self.rowpsd.log_pdet, self.colpsd.U,
- self.colpsd.log_pdet)
- return _squeeze_output(out)
- def pdf(self, X):
- return np.exp(self.logpdf(X))
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(self.mean, self.rowcov, self.colcov, size,
- random_state)
- def entropy(self):
- return self._dist._entropy(self.dims, self.rowpsd.log_pdet,
- self.colpsd.log_pdet)
- # Set frozen generator docstrings from corresponding docstrings in
- # matrix_normal_gen and fill in default strings in class docstrings
- for name in ['logpdf', 'pdf', 'rvs', 'entropy']:
- method = matrix_normal_gen.__dict__[name]
- method_frozen = matrix_normal_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(method.__doc__,
- matnorm_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__, matnorm_docdict_params)
- _matt_doc_default_callparams = """\
- mean : array_like, optional
- Mean of the distribution (default: `None`)
- row_spread : array_like, optional
- Row-wise 2nd order raw central moment matrix (default: ``1``)
- col_spread : array_like, optional
- Column-wise 2nd order raw central moment matrix (default: ``1``)
- df : scalar, optional
- Degrees of freedom (default: ``1``)
- """
- _matt_doc_callparams_note = """\
- If `mean` is set to `None` then a matrix of zeros is used for the mean.
- The dimensions of this matrix are inferred from the shape of `row_spread` and
- `col_spread`, if these are provided, or set to ``1`` if ambiguous.
- `row_spread` and `col_spread` can be two-dimensional array_likes specifying the
- spread matrices directly. Alternatively, a one-dimensional array will
- be be interpreted as the entries of a diagonal matrix, and a scalar or
- zero-dimensional array will be interpreted as this value times the
- identity matrix.
- """
- _matt_doc_frozen_callparams = ""
- _matt_doc_frozen_callparams_note = """\
- See class definition for a detailed description of parameters."""
- matrix_t_docdict_params = {
- "_matt_doc_default_callparams": _matt_doc_default_callparams,
- "_matt_doc_callparams_note": _matt_doc_callparams_note,
- "_doc_random_state": _doc_random_state,
- }
- matrix_t_docdict_noparams = {
- "_matt_doc_default_callparams": _matt_doc_frozen_callparams,
- "_matt_doc_callparams_note": _matt_doc_frozen_callparams_note,
- "_doc_random_state": _doc_random_state,
- }
- class matrix_t_gen(multi_rv_generic):
- r"""A matrix t-random variable.
- The `mean` keyword specifies the mean.
- The `row_spread` keyword specifies the row-wise spread matrix.
- The `col_spread` keyword specifies the column-wise spread matrix.
- Methods
- -------
- pdf(x, mean=None, row_spread=None, col_spread=None)
- Probability density function.
- logpdf(x, mean=None, row_spread=None, col_spread=None)
- Log of the probability density function.
- rvs(mean=None, row_spread=1, col_spread=1, df=1, size=1, random_state=None)
- Draw random samples.
- Parameters
- ----------
- %(_matt_doc_default_callparams)s
- %(_doc_random_state)s
- Notes
- -----
- %(_matt_doc_callparams_note)s
- The spread matrices specified by `row_spread` and `col_spread` must be
- (symmetric) positive definite. If the samples in `X` have shape `(m,n)`
- then `row_spread` must have shape `(m,m)` and `col_spread` must have shape `(n,n)`.
- Spread matrices must be full rank.
- The probability density function for `matrix_t` is
- .. math::
- f(X \vert \mathrm{M}, \Sigma, \Omega, \mathrm{df}) =
- \frac{
- \Gamma_n \left(
- \frac{\mathrm{df} + m + n - 1}{2}
- \right)
- \left(
- \det \left(
- I_n + (X - \mathrm{M})^T \Sigma^{-1} (X - \mathrm{M}) \Omega^{-1}
- \right)
- \right)^{ -\frac{\mathrm{df} + m + n - 1}{2} }
- }{
- \Gamma_n \left(
- \frac{\mathrm{df} + n - 1}{2}
- \right)
- \pi^{mn / 2}
- \left( \det \Sigma \right)^{n/2}
- \left( \det \Omega \right)^{m/2}
- }
- or, alternatively,
- .. math::
- f(X \vert \mathrm{M}, \Sigma, \Omega, \mathrm{df}) =
- \frac{
- \Gamma_m \left(
- \frac{\mathrm{df} + m + n - 1}{2}
- \right)
- \left(
- \det \left(
- I_m + \Sigma^{-1} (X - \mathrm{M}) \Omega^{-1} (X - \mathrm{M})^T
- \right)
- \right)^{ -\frac{\mathrm{df} + m + n - 1}{2} }
- }{
- \Gamma_m \left(
- \frac{\mathrm{df} + n - 1}{2}
- \right)
- \pi^{mn / 2}
- \left( \det \Sigma \right)^{n/2}
- \left( \det \Omega \right)^{m/2}
- }
- where :math:`\mathrm{M}` is the mean,
- :math:`\Sigma` is the row-wise spread matrix,
- :math:`\Omega` is the column-wise matrix,
- :math:`\mathrm{df}` is the degrees of freedom,
- and :math:`\Gamma_n` is the multivariate gamma function.
- These equivalent formulations come from the identity
- .. math::
- \det\left( I_m + A B \right) = \det\left( I_n + B A \right)
- for :math:`m \times n` arrays :math:`A` and :math:`B^T`
- and the fact that
- :math:`\gamma_n(\mathrm{df} + m) / \gamma_n(\mathrm{df})`
- is equal to
- :math:`\gamma_m(\mathrm{df} + n) / \gamma_m(\mathrm{df})`,
- where
- .. math::
- \gamma_m(\mathrm{df}) = 2^{m(m-1)/2}
- \Gamma_m\left( (\mathrm{df} + m - 1) / 2 \right)
- denotes a normalized multivariate gamma function.
- When :math:`\mathrm{df} = 1` this distribution is known as the matrix
- variate Cauchy.
- .. versionadded:: 1.17.0
- References
- ----------
- .. [1] Gupta, A.K., & Nagar, D.K. (2000). Matrix Variate Distributions (1st ed.).
- Chapman and Hall/CRC.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import matrix_t
- >>> M = np.arange(6).reshape(3,2)
- >>> M
- array([[0, 1],
- [2, 3],
- [4, 5]])
- >>> Sigma = np.diag([1,2,3])
- >>> Sigma
- array([[1, 0, 0],
- [0, 2, 0],
- [0, 0, 3]])
- >>> Omega = 0.3*np.identity(2)
- >>> Omega
- array([[ 0.3, 0. ],
- [ 0. , 0.3]])
- >>> X = M + 0.1
- >>> X
- array([[ 0.1, 1.1],
- [ 2.1, 3.1],
- [ 4.1, 5.1]])
- >>> df = 3
- >>> matrix_t.pdf(X, mean=M, row_spread=Sigma, col_spread=Omega, df=df)
- 0.9972880280135796
- Alternatively, the object may be called (as a function) to fix the mean
- and spread parameters, returning a "frozen" matrix t
- random variable:
- >>> rv = matrix_t(mean=None, row_spread=1, col_spread=1, df=1)
- >>> # Frozen object with the same methods but holding the given
- >>> # mean and spreads and degrees of freedom fixed.
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = scipy._lib.doccer.docformat(
- self.__doc__, matrix_t_docdict_params
- )
- def __call__(self, mean=None, row_spread=1, col_spread=1, df=None, seed=None):
- """Create a frozen matrix t distribution.
- See `matrix_t_frozen` for more information.
- """
- return matrix_t_frozen(mean, row_spread, col_spread, df, seed)
- def _process_parameters(self, mean, row_spread, col_spread, df):
- """
- Infer dimensionality from mean or covariance matrices.
- Handle defaults. Ensure conformality.
- Parameters
- ----------
- mean : ndarray, shape (m,n)
- Mean of the distribution
- row_spread : ndarray, shape (m,m)
- Row-wise spread matrix
- col_spread : ndarray, shape (n,n)
- Column-wise spread matrix
- df : float
- Degrees of freedom
- """
- # Process mean
- if mean is not None:
- mean = np.asarray(mean, dtype=float)
- meanshape = mean.shape
- if 0 in meanshape:
- raise ValueError("Array `mean` has invalid shape.")
- if len(meanshape) != 2:
- raise ValueError("Array `mean` must be 2D.")
- # Process row-wise spread
- row_spread = np.asarray(row_spread, dtype=float)
- if row_spread.ndim == 0:
- if mean is not None:
- row_spread = row_spread * np.identity(meanshape[0])
- else:
- row_spread = row_spread * np.identity(1)
- elif row_spread.ndim == 1:
- row_spread = np.diag(row_spread)
- rowshape = row_spread.shape
- if 0 in rowshape:
- raise ValueError("Array `row_spread` has invalid shape.")
- if len(rowshape) != 2:
- raise ValueError("Array `row_spread` must be a scalar or a 2D array.")
- if rowshape[0] != rowshape[1]:
- raise ValueError("Array `row_spread` must be square.")
- numrows = rowshape[0]
- # Process column-wise spread
- col_spread = np.asarray(col_spread, dtype=float)
- if col_spread.ndim == 0:
- if mean is not None:
- col_spread = col_spread * np.identity(meanshape[1])
- else:
- col_spread = col_spread * np.identity(1)
- elif col_spread.ndim == 1:
- col_spread = np.diag(col_spread)
- colshape = col_spread.shape
- if 0 in colshape:
- raise ValueError("Array `col_spread` has invalid shape.")
- if len(colshape) != 2:
- raise ValueError("Array `col_spread` must be a scalar or a 2D array.")
- if colshape[0] != colshape[1]:
- raise ValueError("Array `col_spread` must be square.")
- numcols = colshape[0]
- # Ensure mean and spreads are conformal
- if mean is not None:
- if meanshape[0] != numrows:
- raise ValueError(
- "Arrays `mean` and `row_spread` must have the same number of rows."
- )
- if meanshape[1] != numcols:
- raise ValueError(
- "Arrays `mean` and `col_spread` must have the same number "
- "of columns."
- )
- else:
- mean = np.zeros((numrows, numcols))
- dims = (numrows, numcols)
- if df is None:
- df = 1 # default to matrix variate Cauchy
- elif not np.isscalar(df):
- raise ValueError("Degrees of freedom must be a scalar.")
- elif df <= 0:
- raise ValueError("Degrees of freedom must be positive.")
- return dims, mean, row_spread, col_spread, df
- def _process_quantiles(self, X, dims):
- """
- Adjust quantiles array so that last two axes labels the component of
- each data point.
- """
- X = np.asarray(X, dtype=float)
- if X.ndim == 2:
- X = X[np.newaxis, :]
- if X.shape[-2:] != dims:
- raise ValueError(
- "The shape of array `X` is not conformal with "
- "the distribution parameters."
- )
- return X
- def _logpdf(
- self,
- dims,
- X,
- mean,
- df,
- invrow_spread,
- invcol_spread,
- logdetrow_spread,
- logdetcol_spread,
- ):
- """
- Log of the matrix t probability density function.
- Parameters
- ----------
- dims : tuple
- Dimensions of the matrix variates
- X : ndarray, shape (m,n) (equal to `dims`)
- Points at which to evaluate the log of the probability density function
- mean : ndarray, shape (m,n)
- Mean of the distribution
- df : float
- Degrees-of-freedom parameter
- invrow_spread : ndarray, shape (m,m)
- Inverse of the row-wise spread matrix
- invcol_spread : ndarray, shape (n,n)
- Inverse of the column-wise spread matrix
- logdetrow_spread : float
- Log-determinant of the row-wise spread matrix
- detcol_spread : float
- Log-determinant of the column-wise spread matrix
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use `logpdf` instead.
- """
- m, n = dims
- X_shape = X.shape
- if X.ndim > 3:
- X = X.reshape(-1, m, n)
- X_centered = X - mean[np.newaxis, ...]
- det_arg = np.identity(n) + np.einsum(
- "nij,njk,nkl,nlp->nip",
- X_centered.transpose(0, 2, 1),
- invrow_spread[np.newaxis, ...],
- X_centered,
- invcol_spread[np.newaxis, ...],
- optimize=True,
- )
- _, logdet = np.linalg.slogdet(det_arg)
- log_d_mn = -((df + m + n - 1) / 2) * logdet
- log_f_mn = (
- scipy.special.multigammaln((df + m + n - 1) / 2, n)
- - scipy.special.multigammaln((df + n - 1) / 2, n)
- - (m * n / 2) * _LOG_PI
- - (n / 2) * logdetrow_spread
- - (m / 2) * logdetcol_spread
- )
- retval = log_d_mn + log_f_mn
- if len(X_shape) > 3:
- retval = retval.reshape(X_shape[:-2])
- return retval
- def logpdf(self, X, mean=None, row_spread=1, col_spread=1, df=1):
- """Log of the matrix normal probability density function.
- Parameters
- ----------
- X : array_like
- Quantiles, with the last two axes of `X` denoting the components.
- %(_matt_doc_default_callparams)s
- Returns
- -------
- logpdf : ndarray
- Log of the probability density function evaluated at `X`
- Notes
- -----
- %(_matt_doc_callparams_note)s
- Examples
- -------
- >>> import numpy as np
- >>> from scipy.stats import matrix_t
- >>> M = np.arange(6).reshape(3,2); M
- array([[0, 1],
- [2, 3],
- [4, 5]])
- >>> Sigma = np.diag([1,2,3]); Sigma
- array([[1, 0, 0],
- [0, 2, 0],
- [0, 0, 3]])
- >>> Omega = 0.3*np.identity(2); Omega
- array([[ 0.3, 0. ],
- [ 0. , 0.3]])
- >>> X = M + 0.1; X
- array([[ 0.1, 1.1],
- [ 2.1, 3.1],
- [ 4.1, 5.1]])
- >>> df = 3; df
- 3
- >>> matrix_t.logpdf(X, mean=M, row_spread=Sigma, col_spread=Omega, df=df)
- -0.002715656044664061
- """
- dims, mean, row_spread, col_spread, df = self._process_parameters(
- mean, row_spread, col_spread, df
- )
- X = self._process_quantiles(X, dims)
- rowpsd = _PSD(row_spread, allow_singular=False)
- colpsd = _PSD(col_spread, allow_singular=False)
- invrow_spread = rowpsd.pinv
- invcol_spread = colpsd.pinv
- logdetrow_spread = rowpsd.log_pdet
- logdetcol_spread = colpsd.log_pdet
- out = self._logpdf(
- dims,
- X,
- mean,
- df,
- invrow_spread,
- invcol_spread,
- logdetrow_spread,
- logdetcol_spread,
- )
- return _squeeze_output(out)
- def pdf(self, X, mean=None, row_spread=1, col_spread=1, df=1):
- """Matrix t probability density function.
- Parameters
- ----------
- X : array_like
- Quantiles, with the last two axes of `X` denoting the components.
- %(_matt_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray
- Probability density function evaluated at `X`
- Notes
- -----
- %(_matt_doc_callparams_note)s
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import matrix_t
- >>> M = np.arange(6).reshape(3,2); M
- array([[0, 1],
- [2, 3],
- [4, 5]])
- >>> Sigma = np.diag([1,2,3]); Sigma
- array([[1, 0, 0],
- [0, 2, 0],
- [0, 0, 3]])
- >>> Omega = 0.3*np.identity(2); Omega
- array([[ 0.3, 0. ],
- [ 0. , 0.3]])
- >>> X = M + 0.1; X
- array([[ 0.1, 1.1],
- [ 2.1, 3.1],
- [ 4.1, 5.1]])
- >>> df = 3; df
- 3
- >>> matrix_t.logpdf(X, mean=M, row_spread=Sigma, col_spread=Omega, df=df)
- 0.9972880280135796
- """
- return np.exp(self.logpdf(X, mean, row_spread, col_spread, df))
- def rvs(
- self, mean=None, row_spread=1, col_spread=1, df=1, size=1, random_state=None
- ) -> np.ndarray:
- """Draw random samples from a matrix t distribution.
- Parameters
- ----------
- %(_matt_doc_default_callparams)s
- size : integer, optional
- Number of samples to draw (default 1).
- %(_doc_random_state)s
- Returns
- -------
- rvs : ndarray or scalar
- Random variates of size (`size`, `dims`), where `dims` is the
- dimension of the random matrices.
- Notes
- -----
- %(_matt_doc_callparams_note)s
- This method takes advantage of the two equivalent expressions of the
- probability density function. It samples a Cholesky factor of a
- random variate of the appropriate inverse Wishart distribution using
- the smaller of the row/column dimensions.
- """
- size = int(size)
- dims, mean, row_spread, col_spread, df = self._process_parameters(
- mean, row_spread, col_spread, df
- )
- random_state = self._get_random_state(random_state)
- # see scipy.stats.matrix_normal.rvs
- std_norm = random_state.standard_normal(
- size=(dims[1], size, dims[0])
- ).transpose(1, 2, 0)
- if dims[0] <= dims[1]:
- rowchol = _cholesky_invwishart_rvs(df, row_spread, size, random_state)
- colchol = scipy.linalg.cholesky(col_spread, lower=True)[np.newaxis, ...]
- else:
- rowchol = scipy.linalg.cholesky(row_spread, lower=True)[np.newaxis, ...]
- colchol = _cholesky_invwishart_rvs(df, col_spread, size, random_state)
- t_raw = np.einsum("ijp,ipq,ikq->ijk", rowchol, std_norm, colchol, optimize=True)
- t_centered = mean[np.newaxis, ...] + t_raw
- if size == 1:
- t_centered = t_centered.reshape(mean.shape)
- return t_centered
- matrix_t = matrix_t_gen()
- class matrix_t_frozen:
- def __init__(self, mean, row_spread, col_spread, df, seed=None):
- self._dist = matrix_t_gen(seed)
- self.dims, self.mean, self.row_spread, self.col_spread, self.df = (
- self._dist._process_parameters(mean, row_spread, col_spread, df)
- )
- self._random_state = np.random.RandomState(seed)
- self.rowpsd = _PSD(self.row_spread, allow_singular=False)
- self.colpsd = _PSD(self.col_spread, allow_singular=False)
- def logpdf(self, X):
- X = self._dist._process_quantiles(X, self.dims)
- rowpsd = _PSD(self.row_spread, allow_singular=False)
- colpsd = _PSD(self.col_spread, allow_singular=False)
- invrow_spread = rowpsd.pinv
- invcol_spread = colpsd.pinv
- logdetrow_spread = rowpsd.log_pdet
- logdetcol_spread = colpsd.log_pdet
- out = self._dist._logpdf(
- self.dims,
- X,
- self.mean,
- self.df,
- invrow_spread,
- invcol_spread,
- logdetrow_spread,
- logdetcol_spread,
- )
- return _squeeze_output(out)
- def pdf(self, X):
- return np.exp(self.logpdf(X))
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(
- self.mean, self.row_spread, self.col_spread, self.df, size, random_state
- )
- # Set frozen generator docstrings from corresponding docstrings in
- # matrix_t_gen and fill in default strings in class docstrings
- for name in ["logpdf", "pdf", "rvs"]:
- method = matrix_t_gen.__dict__[name]
- method_frozen = matrix_t_frozen.__dict__[name]
- method_frozen.__doc__ = scipy._lib.doccer.docformat(
- method.__doc__, matrix_t_docdict_noparams
- )
- method.__doc__ = scipy._lib.doccer.docformat(
- method.__doc__, matrix_t_docdict_params
- )
- def _cholesky_invwishart_rvs(
- df: float, scale: np.ndarray, size: int, random_state: np.random.Generator
- ) -> np.ndarray:
- r"""Samples the lower Cholesky factor of a matrix following an inverse
- Wishart distribution.
- Notes
- -----
- Intended to be used *as a step in the process* for computing random variates
- of a matrix t distribution :math:`\mathcal{T}_{m,n}` by appealing to its
- alternative form as a matrix mixture
- .. math::
- \mathcal{T}_{m,n}( \mathrm{df}, \mathrm{M}, \Sigma, \Omega )
- = \mathcal{N}_{m,n}(
- \mathrm{M},
- \mathcal{W}^{-1}_m(\mathrm{df} + m - 1, \Sigma),
- \Omega
- )
- = \mathcal{N}_{m,n}(
- \mathrm{M},
- \Sigma,
- \mathcal{W}^{-1}_n(\mathrm{df} - n + 1, \Omega)
- )
- where :math:`\mathcal{N}_{m,n}` is a matrix normal distribution
- and :math:`\mathcal{W}^{-1}_d` is an inverse Wishart distribution.
- Accordingly, the degrees of freedom adjustment
- :math:`\mathrm{df} \to \mathrm{df} + d - 1`
- occurrs in the scope of this function.
- """
- df_iw = df + scale.shape[0] - 1
- iw_samples = scipy.stats.invwishart.rvs(df_iw, scale, size, random_state)
- if size == 1:
- iw_samples = iw_samples[np.newaxis, ...]
- chol_samples = np.empty_like(iw_samples)
- for idx in range(size):
- chol_samples[idx] = scipy.linalg.cholesky(
- iw_samples[idx], lower=True, check_finite=False
- ).reshape(iw_samples.shape[1:])
- return chol_samples.reshape((size, *scale.shape))
- _dirichlet_doc_default_callparams = """\
- alpha : array_like
- The concentration parameters. The number of entries determines the
- dimensionality of the distribution.
- """
- _dirichlet_doc_frozen_callparams = ""
- _dirichlet_doc_frozen_callparams_note = """\
- See class definition for a detailed description of parameters."""
- dirichlet_docdict_params = {
- '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams,
- '_doc_random_state': _doc_random_state
- }
- dirichlet_docdict_noparams = {
- '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams,
- '_doc_random_state': _doc_random_state
- }
- def _dirichlet_check_parameters(alpha):
- alpha = np.asarray(alpha)
- if np.min(alpha) <= 0:
- raise ValueError("All parameters must be greater than 0")
- elif alpha.ndim != 1:
- raise ValueError("Parameter vector 'a' must be one dimensional, "
- f"but a.shape = {alpha.shape}.")
- return alpha
- def _dirichlet_check_input(alpha, x):
- x = np.asarray(x)
- if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]:
- raise ValueError("Vector 'x' must have either the same number "
- "of entries as, or one entry fewer than, "
- f"parameter vector 'a', but alpha.shape = {alpha.shape} "
- f"and x.shape = {x.shape}.")
- if x.shape[0] != alpha.shape[0]:
- xk = np.array([1 - np.sum(x, 0)])
- if xk.ndim == 1:
- x = np.append(x, xk)
- elif xk.ndim == 2:
- x = np.vstack((x, xk))
- else:
- raise ValueError("The input must be one dimensional or a two "
- "dimensional matrix containing the entries.")
- if np.min(x) < 0:
- raise ValueError("Each entry in 'x' must be greater than or equal "
- "to zero.")
- if np.max(x) > 1:
- raise ValueError("Each entry in 'x' must be smaller or equal one.")
- # Check x_i > 0 or alpha_i > 1
- xeq0 = (x == 0)
- alphalt1 = (alpha < 1)
- if x.shape != alpha.shape:
- alphalt1 = np.repeat(alphalt1, x.shape[-1], axis=-1).reshape(x.shape)
- chk = np.logical_and(xeq0, alphalt1)
- if np.sum(chk):
- raise ValueError("Each entry in 'x' must be greater than zero if its "
- "alpha is less than one.")
- if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any():
- raise ValueError("The input vector 'x' must lie within the normal "
- f"simplex. but np.sum(x, 0) = {np.sum(x, 0)}.")
- return x
- def _lnB(alpha):
- r"""Internal helper function to compute the log of the useful quotient.
- .. math::
- B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)}
- {\Gamma\left(\sum_{i=1}^{K} \alpha_i \right)}
- Parameters
- ----------
- %(_dirichlet_doc_default_callparams)s
- Returns
- -------
- B : scalar
- Helper quotient, internal use only
- """
- return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))
- class dirichlet_gen(multi_rv_generic):
- r"""A Dirichlet random variable.
- The ``alpha`` keyword specifies the concentration parameters of the
- distribution.
- .. versionadded:: 0.15.0
- Methods
- -------
- pdf(x, alpha)
- Probability density function.
- logpdf(x, alpha)
- Log of the probability density function.
- rvs(alpha, size=1, random_state=None)
- Draw random samples from a Dirichlet distribution.
- mean(alpha)
- The mean of the Dirichlet distribution
- var(alpha)
- The variance of the Dirichlet distribution
- cov(alpha)
- The covariance of the Dirichlet distribution
- entropy(alpha)
- Compute the differential entropy of the Dirichlet distribution.
- Parameters
- ----------
- %(_dirichlet_doc_default_callparams)s
- %(_doc_random_state)s
- Notes
- -----
- Each :math:`\alpha` entry must be positive. The distribution has only
- support on the simplex defined by
- .. math::
- \sum_{i=1}^{K} x_i = 1
- where :math:`0 < x_i < 1`.
- If the quantiles don't lie within the simplex, a ValueError is raised.
- The probability density function for `dirichlet` is
- .. math::
- f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1}
- where
- .. math::
- \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)}
- {\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)}
- and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the
- concentration parameters and :math:`K` is the dimension of the space
- where :math:`x` takes values.
- Note that the `dirichlet` interface is somewhat inconsistent.
- The array returned by the rvs function is transposed
- with respect to the format expected by the pdf and logpdf.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import dirichlet
- Generate a dirichlet random variable
- >>> quantiles = np.array([0.2, 0.2, 0.6]) # specify quantiles
- >>> alpha = np.array([0.4, 5, 15]) # specify concentration parameters
- >>> dirichlet.pdf(quantiles, alpha)
- 0.2843831684937255
- The same PDF but following a log scale
- >>> dirichlet.logpdf(quantiles, alpha)
- -1.2574327653159187
- Once we specify the dirichlet distribution
- we can then calculate quantities of interest
- >>> dirichlet.mean(alpha) # get the mean of the distribution
- array([0.01960784, 0.24509804, 0.73529412])
- >>> dirichlet.var(alpha) # get variance
- array([0.00089829, 0.00864603, 0.00909517])
- >>> dirichlet.entropy(alpha) # calculate the differential entropy
- -4.3280162474082715
- We can also return random samples from the distribution
- >>> dirichlet.rvs(alpha, size=1, random_state=1)
- array([[0.00766178, 0.24670518, 0.74563305]])
- >>> dirichlet.rvs(alpha, size=2, random_state=2)
- array([[0.01639427, 0.1292273 , 0.85437844],
- [0.00156917, 0.19033695, 0.80809388]])
- Alternatively, the object may be called (as a function) to fix
- concentration parameters, returning a "frozen" Dirichlet
- random variable:
- >>> rv = dirichlet(alpha)
- >>> # Frozen object with the same methods but holding the given
- >>> # concentration parameters fixed.
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params)
- def __call__(self, alpha, seed=None):
- return dirichlet_frozen(alpha, seed=seed)
- def _logpdf(self, x, alpha):
- """Log of the Dirichlet probability density function.
- Parameters
- ----------
- x : ndarray
- Points at which to evaluate the log of the probability
- density function
- %(_dirichlet_doc_default_callparams)s
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'logpdf' instead.
- """
- lnB = _lnB(alpha)
- return - lnB + np.sum((xlogy(alpha - 1, x.T)).T, 0)
- def logpdf(self, x, alpha):
- """Log of the Dirichlet probability density function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_dirichlet_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray or scalar
- Log of the probability density function evaluated at `x`.
- """
- alpha = _dirichlet_check_parameters(alpha)
- x = _dirichlet_check_input(alpha, x)
- out = self._logpdf(x, alpha)
- return _squeeze_output(out)
- def pdf(self, x, alpha):
- """The Dirichlet probability density function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_dirichlet_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray or scalar
- The probability density function evaluated at `x`.
- """
- alpha = _dirichlet_check_parameters(alpha)
- x = _dirichlet_check_input(alpha, x)
- out = np.exp(self._logpdf(x, alpha))
- return _squeeze_output(out)
- def mean(self, alpha):
- """Mean of the Dirichlet distribution.
- Parameters
- ----------
- %(_dirichlet_doc_default_callparams)s
- Returns
- -------
- mu : ndarray or scalar
- Mean of the Dirichlet distribution.
- """
- alpha = _dirichlet_check_parameters(alpha)
- out = alpha / (np.sum(alpha))
- return _squeeze_output(out)
- def var(self, alpha):
- """Variance of the Dirichlet distribution.
- Parameters
- ----------
- %(_dirichlet_doc_default_callparams)s
- Returns
- -------
- v : ndarray or scalar
- Variance of the Dirichlet distribution.
- """
- alpha = _dirichlet_check_parameters(alpha)
- alpha0 = np.sum(alpha)
- out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1))
- return _squeeze_output(out)
- def cov(self, alpha):
- """Covariance matrix of the Dirichlet distribution.
- Parameters
- ----------
- %(_dirichlet_doc_default_callparams)s
- Returns
- -------
- cov : ndarray
- The covariance matrix of the distribution.
- """
- alpha = _dirichlet_check_parameters(alpha)
- alpha0 = np.sum(alpha)
- a = alpha / alpha0
- cov = (np.diag(a) - np.outer(a, a)) / (alpha0 + 1)
- return _squeeze_output(cov)
- def entropy(self, alpha):
- """
- Differential entropy of the Dirichlet distribution.
- Parameters
- ----------
- %(_dirichlet_doc_default_callparams)s
- Returns
- -------
- h : scalar
- Entropy of the Dirichlet distribution
- """
- alpha = _dirichlet_check_parameters(alpha)
- alpha0 = np.sum(alpha)
- lnB = _lnB(alpha)
- K = alpha.shape[0]
- out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum(
- (alpha - 1) * scipy.special.psi(alpha))
- return _squeeze_output(out)
- def rvs(self, alpha, size=1, random_state=None):
- """
- Draw random samples from a Dirichlet distribution.
- Parameters
- ----------
- %(_dirichlet_doc_default_callparams)s
- size : int, optional
- Number of samples to draw (default 1).
- %(_doc_random_state)s
- Returns
- -------
- rvs : ndarray or scalar
- Random variates of size (`size`, `N`), where `N` is the
- dimension of the random variable.
- """
- alpha = _dirichlet_check_parameters(alpha)
- random_state = self._get_random_state(random_state)
- return random_state.dirichlet(alpha, size=size)
- dirichlet = dirichlet_gen()
- class dirichlet_frozen(multi_rv_frozen):
- __class_getitem__ = None
- def __init__(self, alpha, seed=None):
- self.alpha = _dirichlet_check_parameters(alpha)
- self._dist = dirichlet_gen(seed)
- def logpdf(self, x):
- return self._dist.logpdf(x, self.alpha)
- def pdf(self, x):
- return self._dist.pdf(x, self.alpha)
- def mean(self):
- return self._dist.mean(self.alpha)
- def var(self):
- return self._dist.var(self.alpha)
- def cov(self):
- return self._dist.cov(self.alpha)
- def entropy(self):
- return self._dist.entropy(self.alpha)
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(self.alpha, size, random_state)
- # Set frozen generator docstrings from corresponding docstrings in
- # multivariate_normal_gen and fill in default strings in class docstrings
- for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'cov', 'entropy']:
- method = dirichlet_gen.__dict__[name]
- method_frozen = dirichlet_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(
- method.__doc__, dirichlet_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params)
- _wishart_doc_default_callparams = """\
- df : int
- Degrees of freedom, must be greater than or equal to dimension of the
- scale matrix
- scale : array_like
- Symmetric positive definite scale matrix of the distribution
- """
- _wishart_doc_callparams_note = ""
- _wishart_doc_frozen_callparams = ""
- _wishart_doc_frozen_callparams_note = """\
- See class definition for a detailed description of parameters."""
- wishart_docdict_params = {
- '_doc_default_callparams': _wishart_doc_default_callparams,
- '_doc_callparams_note': _wishart_doc_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- wishart_docdict_noparams = {
- '_doc_default_callparams': _wishart_doc_frozen_callparams,
- '_doc_callparams_note': _wishart_doc_frozen_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- class wishart_gen(multi_rv_generic):
- r"""A Wishart random variable.
- The `df` keyword specifies the degrees of freedom. The `scale` keyword
- specifies the scale matrix, which must be symmetric and positive definite.
- In this context, the scale matrix is often interpreted in terms of a
- multivariate normal precision matrix (the inverse of the covariance
- matrix). These arguments must satisfy the relationship
- ``df > scale.ndim - 1``, but see notes on using the `rvs` method with
- ``df < scale.ndim``.
- Methods
- -------
- pdf(x, df, scale)
- Probability density function.
- logpdf(x, df, scale)
- Log of the probability density function.
- rvs(df, scale, size=1, random_state=None)
- Draw random samples from a Wishart distribution.
- entropy()
- Compute the differential entropy of the Wishart distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- %(_doc_random_state)s
- Raises
- ------
- scipy.linalg.LinAlgError
- If the scale matrix `scale` is not positive definite.
- See Also
- --------
- invwishart, chi2
- Notes
- -----
- %(_doc_callparams_note)s
- The scale matrix `scale` must be a symmetric positive definite
- matrix. Singular matrices, including the symmetric positive semi-definite
- case, are not supported. Symmetry is not checked; only the lower triangular
- portion is used.
- The Wishart distribution is often denoted
- .. math::
- W_p(\nu, \Sigma)
- where :math:`\nu` is the degrees of freedom and :math:`\Sigma` is the
- :math:`p \times p` scale matrix.
- The probability density function for `wishart` has support over positive
- definite matrices :math:`S`; if :math:`S \sim W_p(\nu, \Sigma)`, then
- its PDF is given by:
- .. math::
- f(S) = \frac{|S|^{\frac{\nu - p - 1}{2}}}{2^{ \frac{\nu p}{2} }
- |\Sigma|^\frac{\nu}{2} \Gamma_p \left ( \frac{\nu}{2} \right )}
- \exp\left( -tr(\Sigma^{-1} S) / 2 \right)
- If :math:`S \sim W_p(\nu, \Sigma)` (Wishart) then
- :math:`S^{-1} \sim W_p^{-1}(\nu, \Sigma^{-1})` (inverse Wishart).
- If the scale matrix is 1-dimensional and equal to one, then the Wishart
- distribution :math:`W_1(\nu, 1)` collapses to the :math:`\chi^2(\nu)`
- distribution.
- The algorithm [2]_ implemented by the `rvs` method may
- produce numerically singular matrices with :math:`p - 1 < \nu < p`; the
- user may wish to check for this condition and generate replacement samples
- as necessary.
- .. versionadded:: 0.16.0
- References
- ----------
- .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
- Wiley, 1983.
- .. [2] W.B. Smith and R.R. Hocking, "Algorithm AS 53: Wishart Variate
- Generator", Applied Statistics, vol. 21, pp. 341-345, 1972.
- Examples
- --------
- >>> import numpy as np
- >>> import matplotlib.pyplot as plt
- >>> from scipy.stats import wishart, chi2
- >>> x = np.linspace(1e-5, 8, 100)
- >>> w = wishart.pdf(x, df=3, scale=1); w[:5]
- array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
- >>> c = chi2.pdf(x, 3); c[:5]
- array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
- >>> plt.plot(x, w)
- >>> plt.show()
- The input quantiles can be any shape of array, as long as the last
- axis labels the components.
- Alternatively, the object may be called (as a function) to fix the degrees
- of freedom and scale parameters, returning a "frozen" Wishart random
- variable:
- >>> rv = wishart(df=1, scale=1)
- >>> # Frozen object with the same methods but holding the given
- >>> # degrees of freedom and scale fixed.
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
- def __call__(self, df=None, scale=None, seed=None):
- """Create a frozen Wishart distribution.
- See `wishart_frozen` for more information.
- """
- return wishart_frozen(df, scale, seed)
- def _process_parameters(self, df, scale):
- if scale is None:
- scale = 1.0
- scale = np.asarray(scale, dtype=float)
- if scale.ndim == 0:
- scale = scale[np.newaxis, np.newaxis]
- elif scale.ndim == 1:
- scale = np.diag(scale)
- elif scale.ndim == 2 and not scale.shape[0] == scale.shape[1]:
- raise ValueError("Array 'scale' must be square if it is two dimensional,"
- f" but scale.scale = {str(scale.shape)}.")
- elif scale.ndim > 2:
- raise ValueError(f"Array 'scale' must be at most two-dimensional, "
- f"but scale.ndim = {scale.ndim}")
- dim = scale.shape[0]
- if df is None:
- df = dim
- elif not np.isscalar(df):
- raise ValueError("Degrees of freedom must be a scalar.")
- elif df <= dim - 1:
- raise ValueError("Degrees of freedom must be greater than the "
- "dimension of scale matrix minus 1.")
- return dim, df, scale
- def _process_quantiles(self, x, dim):
- """
- Adjust quantiles array so that last axis labels the components of
- each data point.
- """
- x = np.asarray(x, dtype=float)
- if x.ndim == 0:
- x = x * np.eye(dim)[:, :, np.newaxis]
- if x.ndim == 1:
- if dim == 1:
- x = x[np.newaxis, np.newaxis, :]
- else:
- x = np.diag(x)[:, :, np.newaxis]
- elif x.ndim == 2:
- if not x.shape[0] == x.shape[1]:
- raise ValueError(
- "Quantiles must be square if they are two dimensional,"
- f" but x.shape = {str(x.shape)}.")
- x = x[:, :, np.newaxis]
- elif x.ndim == 3:
- if not x.shape[0] == x.shape[1]:
- raise ValueError(
- "Quantiles must be square in the first two dimensions "
- f"if they are three dimensional, but x.shape = {str(x.shape)}.")
- elif x.ndim > 3:
- raise ValueError(f"Quantiles must be at most two-dimensional with an "
- f"additional dimension for multiple components, "
- f"but x.ndim = {x.ndim}")
- # Now we have 3-dim array; should have shape [dim, dim, *]
- if not x.shape[0:2] == (dim, dim):
- raise ValueError('Quantiles have incompatible dimensions: should'
- f' be {(dim, dim)}, got {x.shape[0:2]}.')
- return x
- def _process_size(self, size):
- size = np.asarray(size)
- if size.ndim == 0:
- size = size[np.newaxis]
- elif size.ndim > 1:
- raise ValueError('Size must be an integer or tuple of integers;'
- ' thus must have dimension <= 1.'
- f' Got size.ndim = {str(tuple(size))}')
- n = size.prod()
- shape = tuple(size)
- return n, shape
- def _logpdf(self, x, dim, df, scale, log_det_scale, C):
- """Log of the Wishart probability density function.
- Parameters
- ----------
- x : ndarray
- Points at which to evaluate the log of the probability
- density function
- dim : int
- Dimension of the scale matrix
- df : int
- Degrees of freedom
- scale : ndarray
- Scale matrix
- log_det_scale : float
- Logarithm of the determinant of the scale matrix
- C : ndarray
- Cholesky factorization of the scale matrix, lower triangular.
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'logpdf' instead.
- """
- # log determinant of x
- # Note: x has components along the last axis, so that x.T has
- # components alone the 0-th axis. Then since det(A) = det(A'), this
- # gives us a 1-dim vector of determinants
- # Retrieve tr(scale^{-1} x)
- log_det_x = np.empty(x.shape[-1])
- scale_inv_x = np.empty(x.shape)
- tr_scale_inv_x = np.empty(x.shape[-1])
- for i in range(x.shape[-1]):
- _, log_det_x[i] = self._cholesky_logdet(x[:, :, i])
- scale_inv_x[:, :, i] = scipy.linalg.cho_solve((C, True), x[:, :, i])
- tr_scale_inv_x[i] = scale_inv_x[:, :, i].trace()
- # Log PDF
- out = ((0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) -
- (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale +
- multigammaln(0.5*df, dim)))
- return out
- def logpdf(self, x, df, scale):
- """Log of the Wishart probability density function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- Each quantile must be a symmetric positive definite matrix.
- %(_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray
- Log of the probability density function evaluated at `x`
- Notes
- -----
- %(_doc_callparams_note)s
- """
- dim, df, scale = self._process_parameters(df, scale)
- x = self._process_quantiles(x, dim)
- # Cholesky decomposition of scale, get log(det(scale))
- C, log_det_scale = self._cholesky_logdet(scale)
- out = self._logpdf(x, dim, df, scale, log_det_scale, C)
- return _squeeze_output(out)
- def pdf(self, x, df, scale):
- """Wishart probability density function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- Each quantile must be a symmetric positive definite matrix.
- %(_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray
- Probability density function evaluated at `x`
- Notes
- -----
- %(_doc_callparams_note)s
- """
- return np.exp(self.logpdf(x, df, scale))
- def _mean(self, dim, df, scale):
- """Mean of the Wishart distribution.
- Parameters
- ----------
- dim : int
- Dimension of the scale matrix
- %(_doc_default_callparams)s
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'mean' instead.
- """
- return df * scale
- def mean(self, df, scale):
- """Mean of the Wishart distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- mean : float
- The mean of the distribution
- """
- dim, df, scale = self._process_parameters(df, scale)
- out = self._mean(dim, df, scale)
- return _squeeze_output(out)
- def _mode(self, dim, df, scale):
- """Mode of the Wishart distribution.
- Parameters
- ----------
- dim : int
- Dimension of the scale matrix
- %(_doc_default_callparams)s
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'mode' instead.
- """
- if df >= dim + 1:
- out = (df-dim-1) * scale
- else:
- out = None
- return out
- def mode(self, df, scale):
- """Mode of the Wishart distribution
- Only valid if the degrees of freedom are greater than the dimension of
- the scale matrix.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- mode : float or None
- The Mode of the distribution
- """
- dim, df, scale = self._process_parameters(df, scale)
- out = self._mode(dim, df, scale)
- return _squeeze_output(out) if out is not None else out
- def _var(self, dim, df, scale):
- """Variance of the Wishart distribution.
- Parameters
- ----------
- dim : int
- Dimension of the scale matrix
- %(_doc_default_callparams)s
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'var' instead.
- """
- var = scale**2
- diag = scale.diagonal() # 1 x dim array
- var += np.outer(diag, diag)
- var *= df
- return var
- def var(self, df, scale):
- """Variance of the Wishart distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- var : float
- The variance of the distribution
- """
- dim, df, scale = self._process_parameters(df, scale)
- out = self._var(dim, df, scale)
- return _squeeze_output(out)
- def _standard_rvs(self, n, shape, dim, df, random_state):
- """
- Parameters
- ----------
- n : integer
- Number of variates to generate
- shape : iterable
- Shape of the variates to generate
- dim : int
- Dimension of the scale matrix
- df : int
- Degrees of freedom
- random_state : {None, int, `numpy.random.Generator`,
- `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'rvs' instead.
- """
- # Random normal variates for off-diagonal elements
- n_tril = dim * (dim-1) // 2
- covariances = random_state.normal(
- size=n*n_tril).reshape(shape+(n_tril,))
- # Random chi-square variates for diagonal elements
- variances = (np.r_[[random_state.chisquare(df-(i+1)+1, size=n)**0.5
- for i in range(dim)]].reshape((dim,) +
- shape[::-1]).T)
- # Create the A matri(ces) - lower triangular
- A = np.zeros(shape + (dim, dim))
- # Input the covariances
- size_idx = tuple([slice(None, None, None)]*len(shape))
- tril_idx = np.tril_indices(dim, k=-1)
- A[size_idx + tril_idx] = covariances
- # Input the variances
- diag_idx = np.diag_indices(dim)
- A[size_idx + diag_idx] = variances
- return A
- def _rvs(self, n, shape, dim, df, C, random_state):
- """Draw random samples from a Wishart distribution.
- Parameters
- ----------
- n : integer
- Number of variates to generate
- shape : iterable
- Shape of the variates to generate
- dim : int
- Dimension of the scale matrix
- df : int
- Degrees of freedom
- C : ndarray
- Cholesky factorization of the scale matrix, lower triangular.
- %(_doc_random_state)s
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'rvs' instead.
- """
- random_state = self._get_random_state(random_state)
- # Calculate the matrices A, which are actually lower triangular
- # Cholesky factorizations of a matrix B such that B ~ W(df, I)
- A = self._standard_rvs(n, shape, dim, df, random_state)
- # Calculate SA = C A A' C', where SA ~ W(df, scale)
- # Note: this is the product of a (lower) (lower) (lower)' (lower)'
- # or, denoting B = AA', it is C B C' where C is the lower
- # triangular Cholesky factorization of the scale matrix.
- # this appears to conflict with the instructions in [1]_, which
- # suggest that it should be D' B D where D is the lower
- # triangular factorization of the scale matrix. However, it is
- # meant to refer to the Bartlett (1933) representation of a
- # Wishart random variate as L A A' L' where L is lower triangular
- # so it appears that understanding D' to be upper triangular
- # is either a typo in or misreading of [1]_.
- for index in np.ndindex(shape):
- CA = np.dot(C, A[index])
- A[index] = np.dot(CA, CA.T)
- return A
- def rvs(self, df, scale, size=1, random_state=None):
- """Draw random samples from a Wishart distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- size : integer or iterable of integers, optional
- Number of samples to draw (default 1).
- %(_doc_random_state)s
- Returns
- -------
- rvs : ndarray
- Random variates of shape (`size`) + (``dim``, ``dim``), where
- ``dim`` is the dimension of the scale matrix.
- Notes
- -----
- %(_doc_callparams_note)s
- """
- n, shape = self._process_size(size)
- dim, df, scale = self._process_parameters(df, scale)
- # Cholesky decomposition of scale
- C = scipy.linalg.cholesky(scale, lower=True)
- out = self._rvs(n, shape, dim, df, C, random_state)
- return _squeeze_output(out)
- def _entropy(self, dim, df, log_det_scale):
- """Compute the differential entropy of the Wishart.
- Parameters
- ----------
- dim : int
- Dimension of the scale matrix
- df : int
- Degrees of freedom
- log_det_scale : float
- Logarithm of the determinant of the scale matrix
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'entropy' instead.
- """
- return (
- 0.5 * (dim+1) * log_det_scale +
- 0.5 * dim * (dim+1) * _LOG_2 +
- multigammaln(0.5*df, dim) -
- 0.5 * (df - dim - 1) * np.sum(
- [psi(0.5*(df + 1 - (i+1))) for i in range(dim)]
- ) +
- 0.5 * df * dim
- )
- def entropy(self, df, scale):
- """Compute the differential entropy of the Wishart.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- h : scalar
- Entropy of the Wishart distribution
- Notes
- -----
- %(_doc_callparams_note)s
- """
- dim, df, scale = self._process_parameters(df, scale)
- _, log_det_scale = self._cholesky_logdet(scale)
- return self._entropy(dim, df, log_det_scale)
- def _cholesky_logdet(self, scale):
- """Compute Cholesky decomposition and determine (log(det(scale)).
- Parameters
- ----------
- scale : ndarray
- Scale matrix.
- Returns
- -------
- c_decomp : ndarray
- The Cholesky decomposition of `scale`.
- logdet : scalar
- The log of the determinant of `scale`.
- Notes
- -----
- This computation of ``logdet`` is equivalent to
- ``np.linalg.slogdet(scale)``. It is ~2x faster though.
- """
- c_decomp = scipy.linalg.cholesky(scale, lower=True)
- logdet = 2 * np.sum(np.log(c_decomp.diagonal()))
- return c_decomp, logdet
- wishart = wishart_gen()
- class wishart_frozen(multi_rv_frozen):
- """Create a frozen Wishart distribution.
- Parameters
- ----------
- df : array_like
- Degrees of freedom of the distribution
- scale : array_like
- Scale matrix of the distribution
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance then
- that instance is used.
- """
- __class_getitem__ = None
- def __init__(self, df, scale, seed=None):
- self._dist = wishart_gen(seed)
- self.dim, self.df, self.scale = self._dist._process_parameters(
- df, scale)
- self.C, self.log_det_scale = self._dist._cholesky_logdet(self.scale)
- def logpdf(self, x):
- x = self._dist._process_quantiles(x, self.dim)
- out = self._dist._logpdf(x, self.dim, self.df, self.scale,
- self.log_det_scale, self.C)
- return _squeeze_output(out)
- def pdf(self, x):
- return np.exp(self.logpdf(x))
- def mean(self):
- out = self._dist._mean(self.dim, self.df, self.scale)
- return _squeeze_output(out)
- def mode(self):
- out = self._dist._mode(self.dim, self.df, self.scale)
- return _squeeze_output(out) if out is not None else out
- def var(self):
- out = self._dist._var(self.dim, self.df, self.scale)
- return _squeeze_output(out)
- def rvs(self, size=1, random_state=None):
- n, shape = self._dist._process_size(size)
- out = self._dist._rvs(n, shape, self.dim, self.df,
- self.C, random_state)
- return _squeeze_output(out)
- def entropy(self):
- return self._dist._entropy(self.dim, self.df, self.log_det_scale)
- # Set frozen generator docstrings from corresponding docstrings in
- # Wishart and fill in default strings in class docstrings
- for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs', 'entropy']:
- method = wishart_gen.__dict__[name]
- method_frozen = wishart_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(
- method.__doc__, wishart_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
- class invwishart_gen(wishart_gen):
- r"""An inverse Wishart random variable.
- The `df` keyword specifies the degrees of freedom. The `scale` keyword
- specifies the scale matrix, which must be symmetric and positive definite.
- In this context, the scale matrix is often interpreted in terms of a
- multivariate normal covariance matrix.
- Methods
- -------
- pdf(x, df, scale)
- Probability density function.
- logpdf(x, df, scale)
- Log of the probability density function.
- rvs(df, scale, size=1, random_state=None)
- Draw random samples from an inverse Wishart distribution.
- entropy(df, scale)
- Differential entropy of the distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- %(_doc_random_state)s
- Raises
- ------
- scipy.linalg.LinAlgError
- If the scale matrix `scale` is not positive definite.
- See Also
- --------
- wishart
- Notes
- -----
- %(_doc_callparams_note)s
- The scale matrix `scale` must be a symmetric positive definite
- matrix. Singular matrices, including the symmetric positive semi-definite
- case, are not supported. Symmetry is not checked; only the lower triangular
- portion is used.
- The inverse Wishart distribution is often denoted
- .. math::
- W_p^{-1}(\nu, \Psi)
- where :math:`\nu` is the degrees of freedom and :math:`\Psi` is the
- :math:`p \times p` scale matrix.
- The probability density function for `invwishart` has support over positive
- definite matrices :math:`S`; if :math:`S \sim W^{-1}_p(\nu, \Sigma)`,
- then its PDF is given by:
- .. math::
- f(S) = \frac{|\Sigma|^\frac{\nu}{2}}{2^{ \frac{\nu p}{2} }
- |S|^{\frac{\nu + p + 1}{2}} \Gamma_p \left(\frac{\nu}{2} \right)}
- \exp\left( -tr(\Sigma S^{-1}) / 2 \right)
- If :math:`S \sim W_p^{-1}(\nu, \Psi)` (inverse Wishart) then
- :math:`S^{-1} \sim W_p(\nu, \Psi^{-1})` (Wishart).
- If the scale matrix is 1-dimensional and equal to one, then the inverse
- Wishart distribution :math:`W_1(\nu, 1)` collapses to the
- inverse Gamma distribution with parameters shape = :math:`\frac{\nu}{2}`
- and scale = :math:`\frac{1}{2}`.
- Instead of inverting a randomly generated Wishart matrix as described in [2],
- here the algorithm in [4] is used to directly generate a random inverse-Wishart
- matrix without inversion.
- .. versionadded:: 0.16.0
- References
- ----------
- .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
- Wiley, 1983.
- .. [2] M.C. Jones, "Generating Inverse Wishart Matrices", Communications
- in Statistics - Simulation and Computation, vol. 14.2, pp.511-514,
- 1985.
- .. [3] Gupta, M. and Srivastava, S. "Parametric Bayesian Estimation of
- Differential Entropy and Relative Entropy". Entropy 12, 818 - 843.
- 2010.
- .. [4] S.D. Axen, "Efficiently generating inverse-Wishart matrices and
- their Cholesky factors", :arXiv:`2310.15884v1`. 2023.
- Examples
- --------
- >>> import numpy as np
- >>> import matplotlib.pyplot as plt
- >>> from scipy.stats import invwishart, invgamma
- >>> x = np.linspace(0.01, 1, 100)
- >>> iw = invwishart.pdf(x, df=6, scale=1)
- >>> iw[:3]
- array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
- >>> ig = invgamma.pdf(x, 6/2., scale=1./2)
- >>> ig[:3]
- array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
- >>> plt.plot(x, iw)
- >>> plt.show()
- The input quantiles can be any shape of array, as long as the last
- axis labels the components.
- Alternatively, the object may be called (as a function) to fix the degrees
- of freedom and scale parameters, returning a "frozen" inverse Wishart
- random variable:
- >>> rv = invwishart(df=1, scale=1)
- >>> # Frozen object with the same methods but holding the given
- >>> # degrees of freedom and scale fixed.
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
- def __call__(self, df=None, scale=None, seed=None):
- """Create a frozen inverse Wishart distribution.
- See `invwishart_frozen` for more information.
- """
- return invwishart_frozen(df, scale, seed)
- def _logpdf(self, x, dim, df, log_det_scale, C):
- """Log of the inverse Wishart probability density function.
- Parameters
- ----------
- x : ndarray
- Points at which to evaluate the log of the probability
- density function.
- dim : int
- Dimension of the scale matrix
- df : int
- Degrees of freedom
- log_det_scale : float
- Logarithm of the determinant of the scale matrix
- C : ndarray
- Cholesky factorization of the scale matrix, lower triangular.
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'logpdf' instead.
- """
- # Retrieve tr(scale x^{-1})
- log_det_x = np.empty(x.shape[-1])
- tr_scale_x_inv = np.empty(x.shape[-1])
- trsm = get_blas_funcs(('trsm'), (x,))
- if dim > 1:
- for i in range(x.shape[-1]):
- Cx, log_det_x[i] = self._cholesky_logdet(x[:, :, i])
- A = trsm(1., Cx, C, side=0, lower=True)
- tr_scale_x_inv[i] = np.linalg.norm(A)**2
- else:
- log_det_x[:] = np.log(x[0, 0])
- tr_scale_x_inv[:] = C[0, 0]**2 / x[0, 0]
- # Log PDF
- out = ((0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) -
- (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) -
- multigammaln(0.5*df, dim))
- return out
- def logpdf(self, x, df, scale):
- """Log of the inverse Wishart probability density function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- Each quantile must be a symmetric positive definite matrix.
- %(_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray
- Log of the probability density function evaluated at `x`
- Notes
- -----
- %(_doc_callparams_note)s
- """
- dim, df, scale = self._process_parameters(df, scale)
- x = self._process_quantiles(x, dim)
- C, log_det_scale = self._cholesky_logdet(scale)
- out = self._logpdf(x, dim, df, log_det_scale, C)
- return _squeeze_output(out)
- def pdf(self, x, df, scale):
- """Inverse Wishart probability density function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- Each quantile must be a symmetric positive definite matrix.
- %(_doc_default_callparams)s
- Returns
- -------
- pdf : ndarray
- Probability density function evaluated at `x`
- Notes
- -----
- %(_doc_callparams_note)s
- """
- return np.exp(self.logpdf(x, df, scale))
- def _mean(self, dim, df, scale):
- """Mean of the inverse Wishart distribution.
- Parameters
- ----------
- dim : int
- Dimension of the scale matrix
- %(_doc_default_callparams)s
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'mean' instead.
- """
- if df > dim + 1:
- out = scale / (df - dim - 1)
- else:
- out = None
- return out
- def mean(self, df, scale):
- """Mean of the inverse Wishart distribution.
- Only valid if the degrees of freedom are greater than the dimension of
- the scale matrix plus one.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- mean : float or None
- The mean of the distribution
- """
- dim, df, scale = self._process_parameters(df, scale)
- out = self._mean(dim, df, scale)
- return _squeeze_output(out) if out is not None else out
- def _mode(self, dim, df, scale):
- """Mode of the inverse Wishart distribution.
- Parameters
- ----------
- dim : int
- Dimension of the scale matrix
- %(_doc_default_callparams)s
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'mode' instead.
- """
- return scale / (df + dim + 1)
- def mode(self, df, scale):
- """Mode of the inverse Wishart distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- mode : float
- The Mode of the distribution
- """
- dim, df, scale = self._process_parameters(df, scale)
- out = self._mode(dim, df, scale)
- return _squeeze_output(out)
- def _var(self, dim, df, scale):
- """Variance of the inverse Wishart distribution.
- Parameters
- ----------
- dim : int
- Dimension of the scale matrix
- %(_doc_default_callparams)s
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'var' instead.
- """
- if df > dim + 3:
- var = (df - dim + 1) * scale**2
- diag = scale.diagonal() # 1 x dim array
- var += (df - dim - 1) * np.outer(diag, diag)
- var /= (df - dim) * (df - dim - 1)**2 * (df - dim - 3)
- else:
- var = None
- return var
- def var(self, df, scale):
- """Variance of the inverse Wishart distribution.
- Only valid if the degrees of freedom are greater than the dimension of
- the scale matrix plus three.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- var : float
- The variance of the distribution
- """
- dim, df, scale = self._process_parameters(df, scale)
- out = self._var(dim, df, scale)
- return _squeeze_output(out) if out is not None else out
- def _inv_standard_rvs(self, n, shape, dim, df, random_state):
- """
- Parameters
- ----------
- n : integer
- Number of variates to generate
- shape : iterable
- Shape of the variates to generate
- dim : int
- Dimension of the scale matrix
- df : int
- Degrees of freedom
- random_state : {None, int, `numpy.random.Generator`,
- `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- Returns
- -------
- A : ndarray
- Random variates of shape (`shape`) + (``dim``, ``dim``).
- Each slice `A[..., :, :]` is lower-triangular, and its
- inverse is the lower Cholesky factor of a draw from
- `invwishart(df, np.eye(dim))`.
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'rvs' instead.
- """
- A = np.zeros(shape + (dim, dim))
- # Random normal variates for off-diagonal elements
- tri_rows, tri_cols = np.tril_indices(dim, k=-1)
- n_tril = dim * (dim-1) // 2
- A[..., tri_rows, tri_cols] = random_state.normal(
- size=(*shape, n_tril),
- )
- # Random chi variates for diagonal elements
- rows = np.arange(dim)
- chi_dfs = (df - dim + 1) + rows
- A[..., rows, rows] = random_state.chisquare(
- df=chi_dfs, size=(*shape, dim),
- )**0.5
- return A
- def _rvs(self, n, shape, dim, df, C, random_state):
- """Draw random samples from an inverse Wishart distribution.
- Parameters
- ----------
- n : integer
- Number of variates to generate
- shape : iterable
- Shape of the variates to generate
- dim : int
- Dimension of the scale matrix
- df : int
- Degrees of freedom
- C : ndarray
- Cholesky factorization of the scale matrix, lower triangular.
- %(_doc_random_state)s
- Notes
- -----
- As this function does no argument checking, it should not be
- called directly; use 'rvs' instead.
- """
- random_state = self._get_random_state(random_state)
- # Get random draws A such that inv(A) ~ iW(df, I)
- A = self._inv_standard_rvs(n, shape, dim, df, random_state)
- # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale)
- trsm = get_blas_funcs(('trsm'), (A,))
- trmm = get_blas_funcs(('trmm'), (A,))
- for index in np.ndindex(A.shape[:-2]):
- if dim > 1:
- # Calculate CA
- # Get CA = C A^{-1} via triangular solver
- CA = trsm(1., A[index], C, side=1, lower=True)
- # get SA
- A[index] = trmm(1., CA, CA, side=1, lower=True, trans_a=True)
- else:
- A[index][0, 0] = (C[0, 0] / A[index][0, 0])**2
- return A
- def rvs(self, df, scale, size=1, random_state=None):
- """Draw random samples from an inverse Wishart distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- size : integer or iterable of integers, optional
- Number of samples to draw (default 1).
- %(_doc_random_state)s
- Returns
- -------
- rvs : ndarray
- Random variates of shape (`size`) + (``dim``, ``dim``), where
- ``dim`` is the dimension of the scale matrix.
- Notes
- -----
- %(_doc_callparams_note)s
- """
- n, shape = self._process_size(size)
- dim, df, scale = self._process_parameters(df, scale)
- # Cholesky decomposition of scale
- C = scipy.linalg.cholesky(scale, lower=True)
- out = self._rvs(n, shape, dim, df, C, random_state)
- return _squeeze_output(out)
- def _entropy(self, dim, df, log_det_scale):
- # reference: eq. (17) from ref. 3
- psi_eval_points = [0.5 * (df - dim + i) for i in range(1, dim + 1)]
- psi_eval_points = np.asarray(psi_eval_points)
- return multigammaln(0.5 * df, dim) + 0.5 * dim * df + \
- 0.5 * (dim + 1) * (log_det_scale - _LOG_2) - \
- 0.5 * (df + dim + 1) * \
- psi(psi_eval_points, out=psi_eval_points).sum()
- def entropy(self, df, scale):
- dim, df, scale = self._process_parameters(df, scale)
- _, log_det_scale = self._cholesky_logdet(scale)
- return self._entropy(dim, df, log_det_scale)
- invwishart = invwishart_gen()
- class invwishart_frozen(multi_rv_frozen):
- __class_getitem__ = None
- def __init__(self, df, scale, seed=None):
- """Create a frozen inverse Wishart distribution.
- Parameters
- ----------
- df : array_like
- Degrees of freedom of the distribution
- scale : array_like
- Scale matrix of the distribution
- seed : {None, int, `numpy.random.Generator`}, optional
- If `seed` is None the `numpy.random.Generator` singleton is used.
- If `seed` is an int, a new ``Generator`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` instance then that instance is
- used.
- """
- self._dist = invwishart_gen(seed)
- self.dim, self.df, self.scale = self._dist._process_parameters(
- df, scale
- )
- # Get the determinant via Cholesky factorization
- self.C = scipy.linalg.cholesky(self.scale, lower=True)
- self.log_det_scale = 2 * np.sum(np.log(self.C.diagonal()))
- def logpdf(self, x):
- x = self._dist._process_quantiles(x, self.dim)
- out = self._dist._logpdf(x, self.dim, self.df,
- self.log_det_scale, self.C)
- return _squeeze_output(out)
- def pdf(self, x):
- return np.exp(self.logpdf(x))
- def mean(self):
- out = self._dist._mean(self.dim, self.df, self.scale)
- return _squeeze_output(out) if out is not None else out
- def mode(self):
- out = self._dist._mode(self.dim, self.df, self.scale)
- return _squeeze_output(out)
- def var(self):
- out = self._dist._var(self.dim, self.df, self.scale)
- return _squeeze_output(out) if out is not None else out
- def rvs(self, size=1, random_state=None):
- n, shape = self._dist._process_size(size)
- out = self._dist._rvs(n, shape, self.dim, self.df,
- self.C, random_state)
- return _squeeze_output(out)
- def entropy(self):
- return self._dist._entropy(self.dim, self.df, self.log_det_scale)
- # Set frozen generator docstrings from corresponding docstrings in
- # inverse Wishart and fill in default strings in class docstrings
- for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs']:
- method = invwishart_gen.__dict__[name]
- method_frozen = wishart_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(
- method.__doc__, wishart_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
- _multinomial_doc_default_callparams = """\
- n : int
- Number of trials
- p : array_like
- Probability of a trial falling into each category; should sum to 1
- """
- _multinomial_doc_callparams_note = """\
- `n` should be a nonnegative integer. Each element of `p` should be in the
- interval :math:`[0,1]` and the elements should sum to 1. If they do not sum to
- 1, the last element of the `p` array is not used and is replaced with the
- remaining probability left over from the earlier elements.
- """
- _multinomial_doc_frozen_callparams = ""
- _multinomial_doc_frozen_callparams_note = """\
- See class definition for a detailed description of parameters."""
- multinomial_docdict_params = {
- '_doc_default_callparams': _multinomial_doc_default_callparams,
- '_doc_callparams_note': _multinomial_doc_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- multinomial_docdict_noparams = {
- '_doc_default_callparams': _multinomial_doc_frozen_callparams,
- '_doc_callparams_note': _multinomial_doc_frozen_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- class multinomial_gen(multi_rv_generic):
- r"""A multinomial random variable.
- Methods
- -------
- pmf(x, n, p)
- Probability mass function.
- logpmf(x, n, p)
- Log of the probability mass function.
- rvs(n, p, size=1, random_state=None)
- Draw random samples from a multinomial distribution.
- entropy(n, p)
- Compute the entropy of the multinomial distribution.
- cov(n, p)
- Compute the covariance matrix of the multinomial distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- %(_doc_random_state)s
- Notes
- -----
- %(_doc_callparams_note)s
- The probability mass function for `multinomial` is
- .. math::
- f(x) = \frac{n!}{x_1! \cdots x_k!} p_1^{x_1} \cdots p_k^{x_k},
- supported on :math:`x=(x_1, \ldots, x_k)` where each :math:`x_i` is a
- nonnegative integer and their sum is :math:`n`.
- .. versionadded:: 0.19.0
- Examples
- --------
- >>> from scipy.stats import multinomial
- >>> rv = multinomial(8, [0.3, 0.2, 0.5])
- >>> rv.pmf([1, 3, 4])
- 0.042000000000000072
- The multinomial distribution for :math:`k=2` is identical to the
- corresponding binomial distribution (tiny numerical differences
- notwithstanding):
- >>> from scipy.stats import binom
- >>> multinomial.pmf([3, 4], n=7, p=[0.4, 0.6])
- 0.29030399999999973
- >>> binom.pmf(3, 7, 0.4)
- 0.29030400000000012
- The functions ``pmf``, ``logpmf``, ``entropy``, and ``cov`` support
- broadcasting, under the convention that the vector parameters (``x`` and
- ``p``) are interpreted as if each row along the last axis is a single
- object. For instance:
- >>> multinomial.pmf([[3, 4], [3, 5]], n=[7, 8], p=[.3, .7])
- array([0.2268945, 0.25412184])
- Here, ``x.shape == (2, 2)``, ``n.shape == (2,)``, and ``p.shape == (2,)``,
- but following the rules mentioned above they behave as if the rows
- ``[3, 4]`` and ``[3, 5]`` in ``x`` and ``[.3, .7]`` in ``p`` were a single
- object, and as if we had ``x.shape = (2,)``, ``n.shape = (2,)``, and
- ``p.shape = ()``. To obtain the individual elements without broadcasting,
- we would do this:
- >>> multinomial.pmf([3, 4], n=7, p=[.3, .7])
- 0.2268945
- >>> multinomial.pmf([3, 5], 8, p=[.3, .7])
- 0.25412184
- This broadcasting also works for ``cov``, where the output objects are
- square matrices of size ``p.shape[-1]``. For example:
- >>> multinomial.cov([4, 5], [[.3, .7], [.4, .6]])
- array([[[ 0.84, -0.84],
- [-0.84, 0.84]],
- [[ 1.2 , -1.2 ],
- [-1.2 , 1.2 ]]])
- In this example, ``n.shape == (2,)`` and ``p.shape == (2, 2)``, and
- following the rules above, these broadcast as if ``p.shape == (2,)``.
- Thus the result should also be of shape ``(2,)``, but since each output is
- a :math:`2 \times 2` matrix, the result in fact has shape ``(2, 2, 2)``,
- where ``result[0]`` is equal to ``multinomial.cov(n=4, p=[.3, .7])`` and
- ``result[1]`` is equal to ``multinomial.cov(n=5, p=[.4, .6])``.
- Alternatively, the object may be called (as a function) to fix the `n` and
- `p` parameters, returning a "frozen" multinomial random variable:
- >>> rv = multinomial(n=7, p=[.3, .7])
- >>> # Frozen object with the same methods but holding the given
- >>> # degrees of freedom and scale fixed.
- See also
- --------
- scipy.stats.binom : The binomial distribution.
- numpy.random.Generator.multinomial : Sampling from the multinomial distribution.
- scipy.stats.multivariate_hypergeom :
- The multivariate hypergeometric distribution.
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = \
- doccer.docformat(self.__doc__, multinomial_docdict_params)
- def __call__(self, n, p, seed=None):
- """Create a frozen multinomial distribution.
- See `multinomial_frozen` for more information.
- """
- return multinomial_frozen(n, p, seed)
- def _process_parameters(self, n, p):
- """Returns: n_, p_, npcond.
- n_ and p_ are arrays of the correct shape; npcond is a boolean array
- flagging values out of the domain.
- """
- eps = np.finfo(np.result_type(np.asarray(p), np.float32)).eps * 10
- p = np.array(p, dtype=np.float64, copy=True)
- p_adjusted = 1. - p[..., :-1].sum(axis=-1)
- # only make adjustment when it's significant
- i_adjusted = np.abs(1 - p.sum(axis=-1)) > eps
- p[i_adjusted, -1] = p_adjusted[i_adjusted]
- if np.any(i_adjusted):
- message = ("Some rows of `p` do not sum to 1.0 within tolerance of "
- f"{eps=}. Currently, the last element of these rows is adjusted "
- "to compensate, but this condition will produce NaNs "
- "beginning in SciPy 1.18.0. Please ensure that rows of `p` sum "
- "to 1.0 to avoid futher disruption.")
- warnings.warn(message, FutureWarning, stacklevel=3)
- # true for bad p
- pcond = np.any(p < 0, axis=-1)
- pcond |= np.any(p > 1, axis=-1)
- n = np.array(n, dtype=int, copy=True)
- # true for bad n
- ncond = n < 0
- return n, p, ncond | pcond
- def _process_quantiles(self, x, n, p):
- """Returns: x_, xcond.
- x_ is an int array; xcond is a boolean array flagging values out of the
- domain.
- """
- xx = np.asarray(x, dtype=int)
- if xx.ndim == 0:
- raise ValueError("x must be an array.")
- if xx.size != 0 and not xx.shape[-1] == p.shape[-1]:
- raise ValueError(f"Size of each quantile should be size of p: "
- f"received {xx.shape[-1]}, but expected "
- f"{p.shape[-1]}.")
- # true for x out of the domain
- cond = np.any(xx != x, axis=-1)
- cond |= np.any(xx < 0, axis=-1)
- cond = cond | (np.sum(xx, axis=-1) != n)
- return xx, cond
- def _checkresult(self, result, cond, bad_value):
- result = np.asarray(result)
- if cond.ndim != 0:
- result[cond] = bad_value
- elif cond:
- if result.ndim == 0:
- return bad_value
- result[...] = bad_value
- return result
- def _logpmf(self, x, n, p):
- return gammaln(n+1) + np.sum(xlogy(x, p) - gammaln(x+1), axis=-1)
- def logpmf(self, x, n, p):
- """Log of the Multinomial probability mass function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_doc_default_callparams)s
- Returns
- -------
- logpmf : ndarray or scalar
- Log of the probability mass function evaluated at `x`
- Notes
- -----
- %(_doc_callparams_note)s
- """
- n, p, npcond = self._process_parameters(n, p)
- x, xcond = self._process_quantiles(x, n, p)
- result = self._logpmf(x, n, p)
- # replace values for which x was out of the domain; broadcast
- # xcond to the right shape
- xcond_ = xcond | np.zeros(npcond.shape, dtype=np.bool_)
- result = self._checkresult(result, xcond_, -np.inf)
- # replace values bad for n or p; broadcast npcond to the right shape
- npcond_ = npcond | np.zeros(xcond.shape, dtype=np.bool_)
- return self._checkresult(result, npcond_, np.nan)
- def pmf(self, x, n, p):
- """Multinomial probability mass function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_doc_default_callparams)s
- Returns
- -------
- pmf : ndarray or scalar
- Probability density function evaluated at `x`
- Notes
- -----
- %(_doc_callparams_note)s
- """
- return np.exp(self.logpmf(x, n, p))
- def mean(self, n, p):
- """Mean of the Multinomial distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- mean : float
- The mean of the distribution
- """
- n, p, npcond = self._process_parameters(n, p)
- result = n[..., np.newaxis]*p
- return self._checkresult(result, npcond, np.nan)
- def cov(self, n, p):
- """Covariance matrix of the multinomial distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- cov : ndarray
- The covariance matrix of the distribution
- """
- n, p, npcond = self._process_parameters(n, p)
- nn = n[..., np.newaxis, np.newaxis]
- result = nn * np.einsum('...j,...k->...jk', -p, p)
- # change the diagonal
- for i in range(p.shape[-1]):
- result[..., i, i] += n*p[..., i]
- return self._checkresult(result, npcond, np.nan)
- def entropy(self, n, p):
- r"""Compute the entropy of the multinomial distribution.
- The entropy is computed using this expression:
- .. math::
- f(x) = - \log n! - n\sum_{i=1}^k p_i \log p_i +
- \sum_{i=1}^k \sum_{x=0}^n \binom n x p_i^x(1-p_i)^{n-x} \log x!
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- h : scalar
- Entropy of the Multinomial distribution
- Notes
- -----
- %(_doc_callparams_note)s
- """
- n, p, npcond = self._process_parameters(n, p)
- x = np.r_[1:np.max(n)+1]
- term1 = n*np.sum(entr(p), axis=-1)
- term1 -= gammaln(n+1)
- n = n[..., np.newaxis]
- new_axes_needed = max(p.ndim, n.ndim) - x.ndim + 1
- new_shape = x.shape + (1,)*new_axes_needed
- x = x.reshape(new_shape)
- term2 = np.sum(binom.pmf(x, n, p)*gammaln(x+1),
- axis=(-1, -1-new_axes_needed))
- return self._checkresult(term1 + term2, npcond, np.nan)
- def rvs(self, n, p, size=None, random_state=None):
- """Draw random samples from a Multinomial distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- size : integer or iterable of integers, optional
- Number of samples to draw (default 1).
- %(_doc_random_state)s
- Returns
- -------
- rvs : ndarray or scalar
- Random variates of shape (`size`, `len(p)`)
- Notes
- -----
- %(_doc_callparams_note)s
- """
- n, p, npcond = self._process_parameters(n, p)
- random_state = self._get_random_state(random_state)
- return random_state.multinomial(n, p, size)
- multinomial = multinomial_gen()
- class multinomial_frozen(multi_rv_frozen):
- r"""Create a frozen Multinomial distribution.
- Parameters
- ----------
- n : int
- number of trials
- p: array_like
- probability of a trial falling into each category; should sum to 1
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance then
- that instance is used.
- """
- def __init__(self, n, p, seed=None):
- self._dist = multinomial_gen(seed)
- self.n, self.p, self.npcond = self._dist._process_parameters(n, p)
- # monkey patch self._dist
- def _process_parameters(n, p):
- return self.n, self.p, self.npcond
- self._dist._process_parameters = _process_parameters
- def logpmf(self, x):
- return self._dist.logpmf(x, self.n, self.p)
- def pmf(self, x):
- return self._dist.pmf(x, self.n, self.p)
- def mean(self):
- return self._dist.mean(self.n, self.p)
- def cov(self):
- return self._dist.cov(self.n, self.p)
- def entropy(self):
- return self._dist.entropy(self.n, self.p)
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(self.n, self.p, size, random_state)
- # Set frozen generator docstrings from corresponding docstrings in
- # multinomial and fill in default strings in class docstrings
- for name in ['logpmf', 'pmf', 'mean', 'cov', 'rvs']:
- method = multinomial_gen.__dict__[name]
- method_frozen = multinomial_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(
- method.__doc__, multinomial_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__,
- multinomial_docdict_params)
- class special_ortho_group_gen(multi_rv_generic):
- r"""A Special Orthogonal matrix (SO(N)) random variable.
- Return a random rotation matrix, drawn from the Haar distribution
- (the only uniform distribution on SO(N)) with a determinant of +1.
- The `dim` keyword specifies the dimension N.
- Methods
- -------
- rvs(dim=None, size=1, random_state=None)
- Draw random samples from SO(N).
- Parameters
- ----------
- dim : scalar
- Dimension of matrices
- seed : {None, int, np.random.RandomState, np.random.Generator}, optional
- Used for drawing random variates.
- If `seed` is `None`, the `~np.random.RandomState` singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used, seeded
- with seed.
- If `seed` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- Notes
- -----
- The ``rvs`` method returns a random rotation matrix drawn from the Haar
- distribution, the only uniform distribution on SO(N). The algorithm generates
- a Haar-distributed orthogonal matrix in O(N) using the ``rvs`` method of
- `ortho_group`, then adjusts the matrix to ensure that the determinant is +1.
- For a random rotation in three dimensions, see
- `scipy.spatial.transform.Rotation.random`.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import special_ortho_group
- >>> x = special_ortho_group.rvs(3)
- >>> np.dot(x, x.T)
- array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
- [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
- [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
- >>> import scipy.linalg
- >>> scipy.linalg.det(x)
- 1.0
- This generates one random matrix from SO(3). It is orthogonal and
- has a determinant of 1.
- Alternatively, the object may be called (as a function) to fix the `dim`
- parameter, returning a "frozen" special_ortho_group random variable:
- >>> rv = special_ortho_group(5)
- >>> # Frozen object with the same methods but holding the
- >>> # dimension parameter fixed.
- See Also
- --------
- ortho_group, scipy.spatial.transform.Rotation.random
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__)
- def __call__(self, dim=None, seed=None):
- """Create a frozen SO(N) distribution.
- See `special_ortho_group_frozen` for more information.
- """
- return special_ortho_group_frozen(dim, seed=seed)
- def _process_parameters(self, dim):
- """Dimension N must be specified; it cannot be inferred."""
- if dim is None or not np.isscalar(dim) or dim < 0 or dim != int(dim):
- raise ValueError("""Dimension of rotation must be specified,
- and must be a scalar nonnegative integer.""")
- return dim
- def rvs(self, dim, size=1, random_state=None):
- """Draw random samples from SO(N).
- Parameters
- ----------
- dim : integer
- Dimension of rotation space (N).
- size : integer, optional
- Number of samples to draw (default 1).
- Returns
- -------
- rvs : ndarray or scalar
- Random size N-dimensional matrices, dimension (size, dim, dim)
- """
- random_state = self._get_random_state(random_state)
- q = ortho_group.rvs(dim, size, random_state)
- dets = np.linalg.det(q)
- if dim:
- q[..., 0, :] /= dets[..., np.newaxis]
- return q
- special_ortho_group = special_ortho_group_gen()
- class special_ortho_group_frozen(multi_rv_frozen):
- __class_getitem__ = None
- def __init__(self, dim=None, seed=None):
- """Create a frozen SO(N) distribution.
- Parameters
- ----------
- dim : scalar
- Dimension of matrices
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- Examples
- --------
- >>> from scipy.stats import special_ortho_group
- >>> g = special_ortho_group(5)
- >>> x = g.rvs()
- """ # numpy/numpydoc#87 # noqa: E501
- self._dist = special_ortho_group_gen(seed)
- self.dim = self._dist._process_parameters(dim)
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(self.dim, size, random_state)
- class ortho_group_gen(multi_rv_generic):
- r"""An Orthogonal matrix (O(N)) random variable.
- Return a random orthogonal matrix, drawn from the O(N) Haar
- distribution (the only uniform distribution on O(N)).
- The `dim` keyword specifies the dimension N.
- Methods
- -------
- rvs(dim=None, size=1, random_state=None)
- Draw random samples from O(N).
- Parameters
- ----------
- dim : scalar
- Dimension of matrices
- seed : {None, int, np.random.RandomState, np.random.Generator}, optional
- Used for drawing random variates.
- If `seed` is `None`, the `~np.random.RandomState` singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used, seeded
- with seed.
- If `seed` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- Notes
- -----
- This class is closely related to `special_ortho_group`.
- Some care is taken to avoid numerical error, as per the paper by Mezzadri.
- References
- ----------
- .. [1] F. Mezzadri, "How to generate random matrices from the classical
- compact groups", :arXiv:`math-ph/0609050v2`.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import ortho_group
- >>> x = ortho_group.rvs(3)
- >>> np.dot(x, x.T)
- array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
- [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
- [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
- >>> import scipy.linalg
- >>> np.fabs(scipy.linalg.det(x))
- 1.0
- This generates one random matrix from O(3). It is orthogonal and
- has a determinant of +1 or -1.
- Alternatively, the object may be called (as a function) to fix the `dim`
- parameter, returning a "frozen" ortho_group random variable:
- >>> rv = ortho_group(5)
- >>> # Frozen object with the same methods but holding the
- >>> # dimension parameter fixed.
- See Also
- --------
- special_ortho_group
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__)
- def __call__(self, dim=None, seed=None):
- """Create a frozen O(N) distribution.
- See `ortho_group_frozen` for more information.
- """
- return ortho_group_frozen(dim, seed=seed)
- def _process_parameters(self, dim):
- """Dimension N must be specified; it cannot be inferred."""
- if dim is None or not np.isscalar(dim) or dim < 0 or dim != int(dim):
- raise ValueError("Dimension of rotation must be specified,"
- "and must be a scalar nonnegative integer.")
- return dim
- def rvs(self, dim, size=1, random_state=None):
- """Draw random samples from O(N).
- Parameters
- ----------
- dim : integer
- Dimension of rotation space (N).
- size : integer, optional
- Number of samples to draw (default 1).
- Returns
- -------
- rvs : ndarray or scalar
- Random size N-dimensional matrices, dimension (size, dim, dim)
- """
- random_state = self._get_random_state(random_state)
- size = int(size)
- dim = self._process_parameters(dim)
- size = (size,) if size > 1 else ()
- z = random_state.normal(size=size + (dim, dim))
- q, r = np.linalg.qr(z)
- # The last two dimensions are the rows and columns of R matrices.
- # Extract the diagonals. Note that this eliminates a dimension.
- d = r.diagonal(offset=0, axis1=-2, axis2=-1)
- # Add back a dimension for proper broadcasting: we're dividing
- # each row of each R matrix by the diagonal of the R matrix.
- q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly
- return q
- ortho_group = ortho_group_gen()
- class ortho_group_frozen(multi_rv_frozen):
- __class_getitem__ = None
- def __init__(self, dim=None, seed=None):
- """Create a frozen O(N) distribution.
- Parameters
- ----------
- dim : scalar
- Dimension of matrices
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- Examples
- --------
- >>> from scipy.stats import ortho_group
- >>> g = ortho_group(5)
- >>> x = g.rvs()
- """ # numpy/numpydoc#87 # noqa: E501
- self._dist = ortho_group_gen(seed)
- self.dim = self._dist._process_parameters(dim)
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(self.dim, size, random_state)
- class random_correlation_gen(multi_rv_generic):
- r"""A random correlation matrix.
- Return a random correlation matrix, given a vector of eigenvalues.
- The returned matrix is symmetric positive semidefinite with unit diagonal.
- The `eigs` keyword specifies the eigenvalues of the correlation matrix,
- and implies the dimension.
- Methods
- -------
- rvs(eigs=None, random_state=None)
- Draw random correlation matrices, all with eigenvalues eigs.
- Parameters
- ----------
- eigs : 1d ndarray
- Eigenvalues of correlation matrix. All eigenvalues need to be non-negative and
- need to sum to the number of eigenvalues.
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- tol : float, optional
- Tolerance for input parameter checks
- diag_tol : float, optional
- Tolerance for deviation of the diagonal of the resulting
- matrix. Default: 1e-7
- Raises
- ------
- RuntimeError
- Floating point error prevented generating a valid correlation
- matrix.
- Returns
- -------
- rvs : ndarray or scalar
- Random size N-dimensional matrices, dimension (size, dim, dim),
- each having eigenvalues eigs.
- Notes
- -----
- Generates a random correlation matrix following a numerically stable
- algorithm spelled out by Davies & Higham. This algorithm uses a single O(N)
- similarity transformation to construct a symmetric positive semi-definite
- matrix, and applies a series of Givens rotations to scale it to have ones
- on the diagonal.
- References
- ----------
- .. [1] Davies, Philip I; Higham, Nicholas J; "Numerically stable generation
- of correlation matrices and their factors", BIT 2000, Vol. 40,
- No. 4, pp. 640 651
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import random_correlation
- >>> rng = np.random.default_rng()
- >>> x = random_correlation.rvs((.5, .8, 1.2, 1.5), random_state=rng)
- >>> x
- array([[ 1. , -0.02423399, 0.03130519, 0.4946965 ],
- [-0.02423399, 1. , 0.20334736, 0.04039817],
- [ 0.03130519, 0.20334736, 1. , 0.02694275],
- [ 0.4946965 , 0.04039817, 0.02694275, 1. ]])
- >>> import scipy.linalg
- >>> e, v = scipy.linalg.eigh(x)
- >>> e
- array([ 0.5, 0.8, 1.2, 1.5])
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__)
- def __call__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):
- """Create a frozen random correlation matrix.
- See `random_correlation_frozen` for more information.
- """
- return random_correlation_frozen(eigs, seed=seed, tol=tol,
- diag_tol=diag_tol)
- def _process_parameters(self, eigs, tol):
- eigs = np.asarray(eigs, dtype=float)
- dim = eigs.size
- if eigs.ndim != 1 or eigs.shape[0] != dim or dim <= 1:
- raise ValueError("Array 'eigs' must be a vector of length "
- "greater than 1.")
- if np.fabs(np.sum(eigs) - dim) > tol:
- raise ValueError("Sum of eigenvalues must equal dimensionality.")
- for x in eigs:
- if x < -tol:
- raise ValueError("All eigenvalues must be non-negative.")
- return dim, eigs
- def _givens_to_1(self, aii, ajj, aij):
- """Computes a 2x2 Givens matrix to put 1's on the diagonal.
- The input matrix is a 2x2 symmetric matrix M = [ aii aij ; aij ajj ].
- The output matrix g is a 2x2 anti-symmetric matrix of the form
- [ c s ; -s c ]; the elements c and s are returned.
- Applying the output matrix to the input matrix (as b=g.T M g)
- results in a matrix with bii=1, provided tr(M) - det(M) >= 1
- and floating point issues do not occur. Otherwise, some other
- valid rotation is returned. When tr(M)==2, also bjj=1.
- """
- aiid = aii - 1.
- ajjd = ajj - 1.
- if ajjd == 0:
- # ajj==1, so swap aii and ajj to avoid division by zero
- return 0., 1.
- dd = math.sqrt(max(aij**2 - aiid*ajjd, 0))
- # The choice of t should be chosen to avoid cancellation [1]
- t = (aij + math.copysign(dd, aij)) / ajjd
- c = 1. / math.sqrt(1. + t*t)
- if c == 0:
- # Underflow
- s = 1.0
- else:
- s = c*t
- return c, s
- def _to_corr(self, m):
- """
- Given a psd matrix m, rotate to put one's on the diagonal, turning it
- into a correlation matrix. This also requires the trace equal the
- dimensionality. Note: modifies input matrix
- """
- # Check requirements for in-place Givens
- if not (m.flags.c_contiguous and m.dtype == np.float64 and
- m.shape[0] == m.shape[1]):
- raise ValueError()
- d = m.shape[0]
- for i in range(d-1):
- if m[i, i] == 1:
- continue
- elif m[i, i] > 1:
- for j in range(i+1, d):
- if m[j, j] < 1:
- break
- else:
- for j in range(i+1, d):
- if m[j, j] > 1:
- break
- c, s = self._givens_to_1(m[i, i], m[j, j], m[i, j])
- # Use BLAS to apply Givens rotations in-place. Equivalent to:
- # g = np.eye(d)
- # g[i, i] = g[j,j] = c
- # g[j, i] = -s; g[i, j] = s
- # m = np.dot(g.T, np.dot(m, g))
- mv = m.ravel()
- drot(mv, mv, c, -s, n=d,
- offx=i*d, incx=1, offy=j*d, incy=1,
- overwrite_x=True, overwrite_y=True)
- drot(mv, mv, c, -s, n=d,
- offx=i, incx=d, offy=j, incy=d,
- overwrite_x=True, overwrite_y=True)
- return m
- def rvs(self, eigs, random_state=None, tol=1e-13, diag_tol=1e-7):
- """Draw random correlation matrices.
- Parameters
- ----------
- eigs : 1d ndarray
- Eigenvalues of correlation matrix
- tol : float, optional
- Tolerance for input parameter checks
- diag_tol : float, optional
- Tolerance for deviation of the diagonal of the resulting
- matrix. Default: 1e-7
- Raises
- ------
- RuntimeError
- Floating point error prevented generating a valid correlation
- matrix.
- Returns
- -------
- rvs : ndarray or scalar
- Random size N-dimensional matrices, dimension (size, dim, dim),
- each having eigenvalues eigs.
- """
- dim, eigs = self._process_parameters(eigs, tol=tol)
- random_state = self._get_random_state(random_state)
- m = ortho_group.rvs(dim, random_state=random_state)
- m = np.dot(np.dot(m, np.diag(eigs)), m.T) # Set the trace of m
- m = self._to_corr(m) # Carefully rotate to unit diagonal
- # Check diagonal
- if abs(m.diagonal() - 1).max() > diag_tol:
- raise RuntimeError("Failed to generate a valid correlation matrix")
- return m
- random_correlation = random_correlation_gen()
- class random_correlation_frozen(multi_rv_frozen):
- __class_getitem__ = None
- def __init__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):
- """Create a frozen random correlation matrix distribution.
- Parameters
- ----------
- eigs : 1d ndarray
- Eigenvalues of correlation matrix
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- tol : float, optional
- Tolerance for input parameter checks
- diag_tol : float, optional
- Tolerance for deviation of the diagonal of the resulting
- matrix. Default: 1e-7
- Raises
- ------
- RuntimeError
- Floating point error prevented generating a valid correlation
- matrix.
- Returns
- -------
- rvs : ndarray or scalar
- Random size N-dimensional matrices, dimension (size, dim, dim),
- each having eigenvalues eigs.
- """ # numpy/numpydoc#87 # noqa: E501
- self._dist = random_correlation_gen(seed)
- self.tol = tol
- self.diag_tol = diag_tol
- _, self.eigs = self._dist._process_parameters(eigs, tol=self.tol)
- def rvs(self, random_state=None):
- return self._dist.rvs(self.eigs, random_state=random_state,
- tol=self.tol, diag_tol=self.diag_tol)
- class unitary_group_gen(multi_rv_generic):
- r"""A matrix-valued U(N) random variable.
- Return a random unitary matrix.
- The `dim` keyword specifies the dimension N.
- Methods
- -------
- rvs(dim=None, size=1, random_state=None)
- Draw random samples from U(N).
- Parameters
- ----------
- dim : scalar
- Dimension of matrices.
- seed : {None, int, np.random.RandomState, np.random.Generator}, optional
- Used for drawing random variates.
- If `seed` is `None`, the `~np.random.RandomState` singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used, seeded
- with seed.
- If `seed` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- Notes
- -----
- This class is similar to `ortho_group`.
- References
- ----------
- .. [1] F. Mezzadri, "How to generate random matrices from the classical
- compact groups", :arXiv:`math-ph/0609050v2`.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import unitary_group
- >>> x = unitary_group.rvs(3)
- >>> np.dot(x, x.conj().T)
- array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
- [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
- [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) # may vary
- This generates one random matrix from U(3). The dot product confirms that
- it is unitary up to machine precision.
- Alternatively, the object may be called (as a function) to fix the `dim`
- parameter, return a "frozen" unitary_group random variable:
- >>> rv = unitary_group(5)
- See Also
- --------
- ortho_group
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__)
- def __call__(self, dim=None, seed=None):
- """Create a frozen (U(N)) n-dimensional unitary matrix distribution.
- See `unitary_group_frozen` for more information.
- """
- return unitary_group_frozen(dim, seed=seed)
- def _process_parameters(self, dim):
- """Dimension N must be specified; it cannot be inferred."""
- if dim is None or not np.isscalar(dim) or dim < 0 or dim != int(dim):
- raise ValueError("Dimension of rotation must be specified,"
- "and must be a scalar nonnegative integer.")
- return dim
- def rvs(self, dim, size=1, random_state=None):
- """Draw random samples from U(N).
- Parameters
- ----------
- dim : integer
- Dimension of space (N).
- size : integer, optional
- Number of samples to draw (default 1).
- Returns
- -------
- rvs : ndarray or scalar
- Random size N-dimensional matrices, dimension (size, dim, dim)
- """
- random_state = self._get_random_state(random_state)
- size = int(size)
- dim = self._process_parameters(dim)
- size = (size,) if size > 1 else ()
- z = 1/math.sqrt(2)*(random_state.normal(size=size + (dim, dim)) +
- 1j*random_state.normal(size=size + (dim, dim)))
- q, r = np.linalg.qr(z)
- # The last two dimensions are the rows and columns of R matrices.
- # Extract the diagonals. Note that this eliminates a dimension.
- d = r.diagonal(offset=0, axis1=-2, axis2=-1)
- # Add back a dimension for proper broadcasting: we're dividing
- # each row of each R matrix by the diagonal of the R matrix.
- q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly
- return q
- unitary_group = unitary_group_gen()
- class unitary_group_frozen(multi_rv_frozen):
- __class_getitem__ = None
- def __init__(self, dim=None, seed=None):
- """Create a frozen (U(N)) n-dimensional unitary matrix distribution.
- Parameters
- ----------
- dim : scalar
- Dimension of matrices
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- Examples
- --------
- >>> from scipy.stats import unitary_group
- >>> x = unitary_group(3)
- >>> x.rvs()
- """ # numpy/numpydoc#87 # noqa: E501
- self._dist = unitary_group_gen(seed)
- self.dim = self._dist._process_parameters(dim)
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(self.dim, size, random_state)
- _mvt_doc_default_callparams = """\
- loc : array_like, optional
- Location of the distribution. (default ``0``)
- shape : array_like, optional
- Positive semidefinite matrix of the distribution. (default ``1``)
- df : float, optional
- Degrees of freedom of the distribution; must be greater than zero.
- If ``np.inf`` then results are multivariate normal. The default is ``1``.
- allow_singular : bool, optional
- Whether to allow a singular matrix. (default ``False``)
- """
- _mvt_doc_callparams_note = """\
- Setting the parameter `loc` to ``None`` is equivalent to having `loc`
- be the zero-vector. The parameter `shape` can be a scalar, in which case
- the shape matrix is the identity times that value, a vector of
- diagonal entries for the shape matrix, or a two-dimensional array_like.
- """
- _mvt_doc_frozen_callparams_note = """\
- See class definition for a detailed description of parameters."""
- mvt_docdict_params = {
- '_mvt_doc_default_callparams': _mvt_doc_default_callparams,
- '_mvt_doc_callparams_note': _mvt_doc_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- mvt_docdict_noparams = {
- '_mvt_doc_default_callparams': "",
- '_mvt_doc_callparams_note': _mvt_doc_frozen_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- class multivariate_t_gen(multi_rv_generic):
- r"""A multivariate t-distributed random variable.
- The `loc` parameter specifies the location. The `shape` parameter specifies
- the positive semidefinite shape matrix. The `df` parameter specifies the
- degrees of freedom.
- In addition to calling the methods below, the object itself may be called
- as a function to fix the location, shape matrix, and degrees of freedom
- parameters, returning a "frozen" multivariate t-distribution random.
- Methods
- -------
- pdf(x, loc=None, shape=1, df=1, allow_singular=False)
- Probability density function.
- logpdf(x, loc=None, shape=1, df=1, allow_singular=False)
- Log of the probability density function.
- cdf(x, loc=None, shape=1, df=1, allow_singular=False, *,
- maxpts=None, lower_limit=None, random_state=None)
- Cumulative distribution function.
- rvs(loc=None, shape=1, df=1, size=1, random_state=None)
- Draw random samples from a multivariate t-distribution.
- entropy(loc=None, shape=1, df=1)
- Differential entropy of a multivariate t-distribution.
- marginal(dimensions, loc=None, shape=1, df=1, allow_singular=False)
- Return a marginal multivariate t-distribution.
- Parameters
- ----------
- %(_mvt_doc_default_callparams)s
- %(_doc_random_state)s
- Notes
- -----
- %(_mvt_doc_callparams_note)s
- The matrix `shape` must be a (symmetric) positive semidefinite matrix. The
- determinant and inverse of `shape` are computed as the pseudo-determinant
- and pseudo-inverse, respectively, so that `shape` does not need to have
- full rank.
- The probability density function for `multivariate_t` is
- .. math::
- f(x) = \frac{\Gamma((\nu + p)/2)}{\Gamma(\nu/2)\nu^{p/2}\pi^{p/2}|\Sigma|^{1/2}}
- \left[1 + \frac{1}{\nu} (\mathbf{x} - \boldsymbol{\mu})^{\top}
- \boldsymbol{\Sigma}^{-1}
- (\mathbf{x} - \boldsymbol{\mu}) \right]^{-(\nu + p)/2},
- where :math:`p` is the dimension of :math:`\mathbf{x}`,
- :math:`\boldsymbol{\mu}` is the :math:`p`-dimensional location,
- :math:`\boldsymbol{\Sigma}` the :math:`p \times p`-dimensional shape
- matrix, and :math:`\nu` is the degrees of freedom.
- .. versionadded:: 1.6.0
- References
- ----------
- .. [1] Arellano-Valle et al. "Shannon Entropy and Mutual Information for
- Multivariate Skew-Elliptical Distributions". Scandinavian Journal
- of Statistics. Vol. 40, issue 1.
- Examples
- --------
- The object may be called (as a function) to fix the `loc`, `shape`,
- `df`, and `allow_singular` parameters, returning a "frozen"
- multivariate_t random variable:
- >>> import numpy as np
- >>> from scipy.stats import multivariate_t
- >>> rv = multivariate_t([1.0, -0.5], [[2.1, 0.3], [0.3, 1.5]], df=2)
- >>> # Frozen object with the same methods but holding the given location,
- >>> # scale, and degrees of freedom fixed.
- Create a contour plot of the PDF.
- >>> import matplotlib.pyplot as plt
- >>> x, y = np.mgrid[-1:3:.01, -2:1.5:.01]
- >>> pos = np.dstack((x, y))
- >>> fig, ax = plt.subplots(1, 1)
- >>> ax.set_aspect('equal')
- >>> plt.contourf(x, y, rv.pdf(pos))
- """
- def __init__(self, seed=None):
- """Initialize a multivariate t-distributed random variable.
- Parameters
- ----------
- seed : Random state.
- """
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__, mvt_docdict_params)
- self._random_state = check_random_state(seed)
- def __call__(self, loc=None, shape=1, df=1, allow_singular=False,
- seed=None):
- """Create a frozen multivariate t-distribution.
- See `multivariate_t_frozen` for parameters.
- """
- if df == np.inf:
- return multivariate_normal_frozen(mean=loc, cov=shape,
- allow_singular=allow_singular,
- seed=seed)
- return multivariate_t_frozen(loc=loc, shape=shape, df=df,
- allow_singular=allow_singular, seed=seed)
- def pdf(self, x, loc=None, shape=1, df=1, allow_singular=False):
- """Multivariate t-distribution probability density function.
- Parameters
- ----------
- x : array_like
- Points at which to evaluate the probability density function.
- %(_mvt_doc_default_callparams)s
- Returns
- -------
- pdf : Probability density function evaluated at `x`.
- Examples
- --------
- >>> from scipy.stats import multivariate_t
- >>> x = [0.4, 5]
- >>> loc = [0, 1]
- >>> shape = [[1, 0.1], [0.1, 1]]
- >>> df = 7
- >>> multivariate_t.pdf(x, loc, shape, df)
- 0.00075713
- """
- dim, loc, shape, df = self._process_parameters(loc, shape, df)
- x = self._process_quantiles(x, dim)
- shape_info = _PSD(shape, allow_singular=allow_singular)
- logpdf = self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df,
- dim, shape_info.rank)
- return np.exp(logpdf)
- def logpdf(self, x, loc=None, shape=1, df=1):
- """Log of the multivariate t-distribution probability density function.
- Parameters
- ----------
- x : array_like
- Points at which to evaluate the log of the probability density
- function.
- %(_mvt_doc_default_callparams)s
- Returns
- -------
- logpdf : Log of the probability density function evaluated at `x`.
- Examples
- --------
- >>> from scipy.stats import multivariate_t
- >>> x = [0.4, 5]
- >>> loc = [0, 1]
- >>> shape = [[1, 0.1], [0.1, 1]]
- >>> df = 7
- >>> multivariate_t.logpdf(x, loc, shape, df)
- -7.1859802
- See Also
- --------
- pdf : Probability density function.
- """
- dim, loc, shape, df = self._process_parameters(loc, shape, df)
- x = self._process_quantiles(x, dim)
- shape_info = _PSD(shape)
- cov_object = _covariance.CovViaPSD(shape_info)
- return self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df, dim,
- shape_info.rank, cov_object)
- def _logpdf(self, x, loc, prec_U, log_pdet, df, dim, rank, cov_object=None):
- """Utility method `pdf`, `logpdf` for parameters.
- Parameters
- ----------
- x : ndarray
- Points at which to evaluate the log of the probability density
- function.
- loc : ndarray
- Location of the distribution.
- prec_U : ndarray
- A decomposition such that `np.dot(prec_U, prec_U.T)` is the inverse
- of the shape matrix.
- log_pdet : float
- Logarithm of the determinant of the shape matrix.
- df : float
- Degrees of freedom of the distribution.
- dim : int
- Dimension of the quantiles x.
- rank : int
- Rank of the shape matrix.
- Notes
- -----
- As this function does no argument checking, it should not be called
- directly; use 'logpdf' instead.
- """
- if df == np.inf:
- return multivariate_normal._logpdf(x, loc, cov_object)
- dev = x - loc
- maha = np.square(np.dot(dev, prec_U)).sum(axis=-1)
- t = 0.5 * (df + dim)
- A = gammaln(t)
- B = gammaln(0.5 * df)
- C = dim/2. * np.log(df * np.pi)
- D = 0.5 * log_pdet
- E = -t * np.log(1 + (1./df) * maha)
- return _squeeze_output(A - B - C - D + E)
- def _cdf(self, x, loc, shape, df, dim, maxpts=None, lower_limit=None,
- random_state=None):
- # All of this - random state validation, maxpts, apply_along_axis,
- # etc. needs to go in this private method unless we want
- # frozen distribution's `cdf` method to duplicate it or call `cdf`,
- # which would require re-processing parameters
- if random_state is not None:
- rng = check_random_state(random_state)
- else:
- rng = self._random_state
- if not maxpts:
- maxpts = 1000 * dim
- x = self._process_quantiles(x, dim)
- lower_limit = (np.full(loc.shape, -np.inf)
- if lower_limit is None else lower_limit)
- # remove the mean
- x, lower_limit = x - loc, lower_limit - loc
- b, a = np.broadcast_arrays(x, lower_limit)
- i_swap = b < a
- signs = (-1)**(i_swap.sum(axis=-1)) # odd # of swaps -> negative
- a, b = a.copy(), b.copy()
- a[i_swap], b[i_swap] = b[i_swap], a[i_swap]
- n = x.shape[-1]
- limits = np.concatenate((a, b), axis=-1)
- def func1d(limits):
- a, b = limits[:n], limits[n:]
- return _qmvt(maxpts, df, shape, a, b, rng)[0]
- res = np.apply_along_axis(func1d, -1, limits) * signs
- # Fixing the output shape for existing distributions is a separate
- # issue. For now, let's keep this consistent with pdf.
- return _squeeze_output(res)
- def cdf(self, x, loc=None, shape=1, df=1, allow_singular=False, *,
- maxpts=None, lower_limit=None, random_state=None):
- """Multivariate t-distribution cumulative distribution function.
- Parameters
- ----------
- x : array_like
- Points at which to evaluate the cumulative distribution function.
- %(_mvt_doc_default_callparams)s
- maxpts : int, optional
- Maximum number of points to use for integration. The default is
- 1000 times the number of dimensions.
- lower_limit : array_like, optional
- Lower limit of integration of the cumulative distribution function.
- Default is negative infinity. Must be broadcastable with `x`.
- %(_doc_random_state)s
- Returns
- -------
- cdf : ndarray or scalar
- Cumulative distribution function evaluated at `x`.
- Examples
- --------
- >>> from scipy.stats import multivariate_t
- >>> x = [0.4, 5]
- >>> loc = [0, 1]
- >>> shape = [[1, 0.1], [0.1, 1]]
- >>> df = 7
- >>> multivariate_t.cdf(x, loc, shape, df)
- 0.64798491
- """
- dim, loc, shape, df = self._process_parameters(loc, shape, df)
- shape = _PSD(shape, allow_singular=allow_singular)._M
- return self._cdf(x, loc, shape, df, dim, maxpts,
- lower_limit, random_state)
- def _entropy(self, dim, df=1, shape=1):
- if df == np.inf:
- return multivariate_normal(None, cov=shape).entropy()
- shape_info = _PSD(shape)
- shape_term = 0.5 * shape_info.log_pdet
- def regular(dim, df):
- halfsum = 0.5 * (dim + df)
- half_df = 0.5 * df
- return (
- -gammaln(halfsum) + gammaln(half_df)
- + 0.5 * dim * np.log(df * np.pi) + halfsum
- * (psi(halfsum) - psi(half_df))
- + shape_term
- )
- def asymptotic(dim, df):
- # Formula from Wolfram Alpha:
- # "asymptotic expansion -gammaln((m+d)/2) + gammaln(d/2) + (m*log(d*pi))/2
- # + ((m+d)/2) * (digamma((m+d)/2) - digamma(d/2))"
- return (
- dim * norm._entropy() + dim / df
- - dim * (dim - 2) * df**-2.0 / 4
- + dim**2 * (dim - 2) * df**-3.0 / 6
- + dim * (-3 * dim**3 + 8 * dim**2 - 8) * df**-4.0 / 24
- + dim**2 * (3 * dim**3 - 10 * dim**2 + 16) * df**-5.0 / 30
- + shape_term
- )[()]
- # preserves ~12 digits accuracy up to at least `dim=1e5`. See gh-18465.
- threshold = dim * 100 * 4 / (np.log(dim) + 1)
- return xpx.apply_where(df >= threshold, (dim, df), asymptotic, regular)
- def entropy(self, loc=None, shape=1, df=1):
- """Calculate the differential entropy of a multivariate
- t-distribution.
- Parameters
- ----------
- %(_mvt_doc_default_callparams)s
- Returns
- -------
- h : float
- Differential entropy
- """
- dim, loc, shape, df = self._process_parameters(None, shape, df)
- return self._entropy(dim, df, shape)
- def rvs(self, loc=None, shape=1, df=1, size=1, random_state=None):
- """Draw random samples from a multivariate t-distribution.
- Parameters
- ----------
- %(_mvt_doc_default_callparams)s
- size : integer, optional
- Number of samples to draw (default 1).
- %(_doc_random_state)s
- Returns
- -------
- rvs : ndarray or scalar
- Random variates of size (`size`, `P`), where `P` is the
- dimension of the random variable.
- Examples
- --------
- >>> from scipy.stats import multivariate_t
- >>> x = [0.4, 5]
- >>> loc = [0, 1]
- >>> shape = [[1, 0.1], [0.1, 1]]
- >>> df = 7
- >>> multivariate_t.rvs(loc, shape, df)
- array([[0.93477495, 3.00408716]])
- """
- # For implementation details, see equation (3):
- #
- # Hofert, "On Sampling from the Multivariatet Distribution", 2013
- # http://rjournal.github.io/archive/2013-2/hofert.pdf
- #
- dim, loc, shape, df = self._process_parameters(loc, shape, df)
- if random_state is not None:
- rng = check_random_state(random_state)
- else:
- rng = self._random_state
- if np.isinf(df):
- x = np.ones(size)
- else:
- x = rng.chisquare(df, size=size) / df
- z = rng.multivariate_normal(np.zeros(dim), shape, size=size)
- samples = loc + z / np.sqrt(x)[..., None]
- return _squeeze_output(samples)
- def _process_quantiles(self, x, dim):
- """
- Adjust quantiles array so that last axis labels the components of
- each data point.
- """
- x = np.asarray(x, dtype=float)
- if x.ndim == 0:
- x = x[np.newaxis]
- elif x.ndim == 1:
- if dim == 1:
- x = x[:, np.newaxis]
- else:
- x = x[np.newaxis, :]
- return x
- def _process_parameters(self, loc, shape, df):
- """
- Infer dimensionality from location array and shape matrix, handle
- defaults, and ensure compatible dimensions.
- """
- if loc is None and shape is None:
- loc = np.asarray(0, dtype=float)
- shape = np.asarray(1, dtype=float)
- dim = 1
- elif loc is None:
- shape = np.asarray(shape, dtype=float)
- if shape.ndim < 2:
- dim = 1
- else:
- dim = shape.shape[0]
- loc = np.zeros(dim)
- elif shape is None:
- loc = np.asarray(loc, dtype=float)
- dim = loc.size
- shape = np.eye(dim)
- else:
- shape = np.asarray(shape, dtype=float)
- loc = np.asarray(loc, dtype=float)
- dim = loc.size
- if dim == 1:
- loc = loc.reshape(1)
- shape = shape.reshape(1, 1)
- if loc.ndim != 1 or loc.shape[0] != dim:
- raise ValueError(f"Array 'loc' must be a vector of length {dim}.")
- if shape.ndim == 0:
- shape = shape * np.eye(dim)
- elif shape.ndim == 1:
- shape = np.diag(shape)
- elif shape.ndim == 2 and shape.shape != (dim, dim):
- rows, cols = shape.shape
- if rows != cols:
- msg = ("Array 'cov' must be square if it is two dimensional,"
- f" but cov.shape = {str(shape.shape)}.")
- else:
- msg = ("Dimension mismatch: array 'cov' is of shape %s,"
- " but 'loc' is a vector of length %d.")
- msg = msg % (str(shape.shape), len(loc))
- raise ValueError(msg)
- elif shape.ndim > 2:
- raise ValueError(f"Array 'cov' must be at most two-dimensional, "
- f"but cov.ndim = {shape.ndim}")
- # Process degrees of freedom.
- if df is None:
- df = 1
- elif df <= 0:
- raise ValueError("'df' must be greater than zero.")
- elif np.isnan(df):
- raise ValueError("'df' is 'nan' but must be greater than zero or 'np.inf'.")
- return dim, loc, shape, df
- def marginal(self, dimensions, loc=None, shape=1, df=1, allow_singular=False):
- """Return a marginal multivariate t-distribution.
- Parameters
- ----------
- dimensions : int or 1-d array_like
- The dimensions of the multivariate t corresponding
- with the marginal variables, that is, the indices of the dimensions
- that are being retained. The other dimensions are marginalized out.
- %(_mvt_doc_default_callparams)s
- Returns
- -------
- marginal_multivariate_t : multivariate_t_frozen
- An object representing the marginal t-distribution.
- Notes
- -----
- %(_mvt_doc_frozen_callparams_note)s
- """
- params = self._process_parameters(loc, shape, df)
- n, loc, shape, df = params
- dims = _validate_marginal_input(dimensions, n)
- loc = loc[dims]
- shape = shape[np.ix_(dims, dims)]
- return multivariate_t_frozen(loc, shape, df, allow_singular)
- class multivariate_t_frozen(multi_rv_frozen):
- __class_getitem__ = None
- def __init__(self, loc=None, shape=1, df=1, allow_singular=False,
- seed=None):
- """Create a frozen multivariate t distribution.
- Parameters
- ----------
- %(_mvt_doc_default_callparams)s
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import multivariate_t
- >>> loc = np.zeros(3)
- >>> shape = np.eye(3)
- >>> df = 10
- >>> dist = multivariate_t(loc, shape, df)
- >>> dist.rvs()
- array([[ 0.81412036, -1.53612361, 0.42199647]])
- >>> dist.pdf([1, 1, 1])
- array([0.01237803])
- """
- self._dist = multivariate_t_gen(seed)
- dim, loc, shape, df = self._dist._process_parameters(loc, shape, df)
- self.dim, self.loc, self.shape, self.df = dim, loc, shape, df
- self.shape_info = _PSD(shape, allow_singular=allow_singular)
- self.allow_singular = allow_singular
- def logpdf(self, x):
- x = self._dist._process_quantiles(x, self.dim)
- U = self.shape_info.U
- log_pdet = self.shape_info.log_pdet
- return self._dist._logpdf(x, self.loc, U, log_pdet, self.df, self.dim,
- self.shape_info.rank)
- def cdf(self, x, *, maxpts=None, lower_limit=None, random_state=None):
- x = self._dist._process_quantiles(x, self.dim)
- return self._dist._cdf(x, self.loc, self.shape, self.df, self.dim,
- maxpts, lower_limit, random_state)
- def pdf(self, x):
- return np.exp(self.logpdf(x))
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(loc=self.loc,
- shape=self.shape,
- df=self.df,
- size=size,
- random_state=random_state)
- def entropy(self):
- return self._dist._entropy(self.dim, self.df, self.shape)
- def marginal(self, dimensions):
- return self._dist.marginal(dimensions, self.loc,
- self.shape, self.df, self.allow_singular)
- multivariate_t = multivariate_t_gen()
- # Set frozen generator docstrings from corresponding docstrings in
- # multivariate_t_gen and fill in default strings in class docstrings
- for name in ['logpdf', 'pdf', 'rvs', 'cdf', 'entropy']:
- method = multivariate_t_gen.__dict__[name]
- method_frozen = multivariate_t_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(method.__doc__,
- mvt_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__, mvt_docdict_params)
- _mhg_doc_default_callparams = """\
- m : array_like
- The number of each type of object in the population.
- That is, :math:`m[i]` is the number of objects of
- type :math:`i`.
- n : array_like
- The number of samples taken from the population.
- """
- _mhg_doc_callparams_note = """\
- `m` must be an array of positive integers. If the quantile
- :math:`i` contains values out of the range :math:`[0, m_i]`
- where :math:`m_i` is the number of objects of type :math:`i`
- in the population or if the parameters are inconsistent with one
- another (e.g. ``x.sum() != n``), methods return the appropriate
- value (e.g. ``0`` for ``pmf``). If `m` or `n` contain negative
- values, the result will contain ``nan`` there.
- """
- _mhg_doc_frozen_callparams = ""
- _mhg_doc_frozen_callparams_note = """\
- See class definition for a detailed description of parameters."""
- mhg_docdict_params = {
- '_doc_default_callparams': _mhg_doc_default_callparams,
- '_doc_callparams_note': _mhg_doc_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- mhg_docdict_noparams = {
- '_doc_default_callparams': _mhg_doc_frozen_callparams,
- '_doc_callparams_note': _mhg_doc_frozen_callparams_note,
- '_doc_random_state': _doc_random_state
- }
- class multivariate_hypergeom_gen(multi_rv_generic):
- r"""A multivariate hypergeometric random variable.
- Methods
- -------
- pmf(x, m, n)
- Probability mass function.
- logpmf(x, m, n)
- Log of the probability mass function.
- rvs(m, n, size=1, random_state=None)
- Draw random samples from a multivariate hypergeometric
- distribution.
- mean(m, n)
- Mean of the multivariate hypergeometric distribution.
- var(m, n)
- Variance of the multivariate hypergeometric distribution.
- cov(m, n)
- Compute the covariance matrix of the multivariate
- hypergeometric distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- %(_doc_random_state)s
- Notes
- -----
- %(_doc_callparams_note)s
- The probability mass function for `multivariate_hypergeom` is
- .. math::
- P(X_1 = x_1, X_2 = x_2, \ldots, X_k = x_k) = \frac{\binom{m_1}{x_1}
- \binom{m_2}{x_2} \cdots \binom{m_k}{x_k}}{\binom{M}{n}}, \\ \quad
- (x_1, x_2, \ldots, x_k) \in \mathbb{N}^k \text{ with }
- \sum_{i=1}^k x_i = n
- where :math:`m_i` are the number of objects of type :math:`i`, :math:`M`
- is the total number of objects in the population (sum of all the
- :math:`m_i`), and :math:`n` is the size of the sample to be taken
- from the population.
- .. versionadded:: 1.6.0
- Examples
- --------
- To evaluate the probability mass function of the multivariate
- hypergeometric distribution, with a dichotomous population of size
- :math:`10` and :math:`20`, at a sample of size :math:`12` with
- :math:`8` objects of the first type and :math:`4` objects of the
- second type, use:
- >>> from scipy.stats import multivariate_hypergeom
- >>> multivariate_hypergeom.pmf(x=[8, 4], m=[10, 20], n=12)
- 0.0025207176631464523
- The `multivariate_hypergeom` distribution is identical to the
- corresponding `hypergeom` distribution (tiny numerical differences
- notwithstanding) when only two types (good and bad) of objects
- are present in the population as in the example above. Consider
- another example for a comparison with the hypergeometric distribution:
- >>> from scipy.stats import hypergeom
- >>> multivariate_hypergeom.pmf(x=[3, 1], m=[10, 5], n=4)
- 0.4395604395604395
- >>> hypergeom.pmf(k=3, M=15, n=4, N=10)
- 0.43956043956044005
- The functions ``pmf``, ``logpmf``, ``mean``, ``var``, ``cov``, and ``rvs``
- support broadcasting, under the convention that the vector parameters
- (``x``, ``m``, and ``n``) are interpreted as if each row along the last
- axis is a single object. For instance, we can combine the previous two
- calls to `multivariate_hypergeom` as
- >>> multivariate_hypergeom.pmf(x=[[8, 4], [3, 1]], m=[[10, 20], [10, 5]],
- ... n=[12, 4])
- array([0.00252072, 0.43956044])
- This broadcasting also works for ``cov``, where the output objects are
- square matrices of size ``m.shape[-1]``. For example:
- >>> multivariate_hypergeom.cov(m=[[7, 9], [10, 15]], n=[8, 12])
- array([[[ 1.05, -1.05],
- [-1.05, 1.05]],
- [[ 1.56, -1.56],
- [-1.56, 1.56]]])
- That is, ``result[0]`` is equal to
- ``multivariate_hypergeom.cov(m=[7, 9], n=8)`` and ``result[1]`` is equal
- to ``multivariate_hypergeom.cov(m=[10, 15], n=12)``.
- Alternatively, the object may be called (as a function) to fix the `m`
- and `n` parameters, returning a "frozen" multivariate hypergeometric
- random variable.
- >>> rv = multivariate_hypergeom(m=[10, 20], n=12)
- >>> rv.pmf(x=[8, 4])
- 0.0025207176631464523
- See Also
- --------
- scipy.stats.hypergeom : The hypergeometric distribution.
- scipy.stats.multinomial : The multinomial distribution.
- References
- ----------
- .. [1] The Multivariate Hypergeometric Distribution,
- http://www.randomservices.org/random/urn/MultiHypergeometric.html
- .. [2] Thomas J. Sargent and John Stachurski, 2020,
- Multivariate Hypergeometric Distribution
- https://python.quantecon.org/multi_hyper.html
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__, mhg_docdict_params)
- def __call__(self, m, n, seed=None):
- """Create a frozen multivariate_hypergeom distribution.
- See `multivariate_hypergeom_frozen` for more information.
- """
- return multivariate_hypergeom_frozen(m, n, seed=seed)
- def _process_parameters(self, m, n):
- m = np.asarray(m)
- n = np.asarray(n)
- if m.size == 0:
- m = m.astype(int)
- if n.size == 0:
- n = n.astype(int)
- if not np.issubdtype(m.dtype, np.integer):
- raise TypeError("'m' must an array of integers.")
- if not np.issubdtype(n.dtype, np.integer):
- raise TypeError("'n' must an array of integers.")
- if m.ndim == 0:
- raise ValueError("'m' must be an array with"
- " at least one dimension.")
- # check for empty arrays
- if m.size != 0:
- n = n[..., np.newaxis]
- m, n = np.broadcast_arrays(m, n)
- # check for empty arrays
- if m.size != 0:
- n = n[..., 0]
- mcond = m < 0
- M = m.sum(axis=-1)
- ncond = (n < 0) | (n > M)
- return M, m, n, mcond, ncond, np.any(mcond, axis=-1) | ncond
- def _process_quantiles(self, x, M, m, n):
- x = np.asarray(x)
- if not np.issubdtype(x.dtype, np.integer):
- raise TypeError("'x' must an array of integers.")
- if x.ndim == 0:
- raise ValueError("'x' must be an array with"
- " at least one dimension.")
- if not x.shape[-1] == m.shape[-1]:
- raise ValueError(f"Size of each quantile must be size of 'm': "
- f"received {x.shape[-1]}, "
- f"but expected {m.shape[-1]}.")
- # check for empty arrays
- if m.size != 0:
- n = n[..., np.newaxis]
- M = M[..., np.newaxis]
- x, m, n, M = np.broadcast_arrays(x, m, n, M)
- # check for empty arrays
- if m.size != 0:
- n, M = n[..., 0], M[..., 0]
- xcond = (x < 0) | (x > m)
- return (x, M, m, n, xcond,
- np.any(xcond, axis=-1) | (x.sum(axis=-1) != n))
- def _checkresult(self, result, cond, bad_value):
- result = np.asarray(result)
- if cond.ndim != 0:
- result[cond] = bad_value
- elif cond:
- return bad_value
- if result.ndim == 0:
- return result[()]
- return result
- def _logpmf(self, x, M, m, n, mxcond, ncond):
- # This equation of the pmf comes from the relation,
- # n combine r = beta(n+1, 1) / beta(r+1, n-r+1)
- num = np.zeros_like(m, dtype=np.float64)
- den = np.zeros_like(n, dtype=np.float64)
- m, x = m[~mxcond], x[~mxcond]
- M, n = M[~ncond], n[~ncond]
- num[~mxcond] = (betaln(m+1, 1) - betaln(x+1, m-x+1))
- den[~ncond] = (betaln(M+1, 1) - betaln(n+1, M-n+1))
- num[mxcond] = np.nan
- den[ncond] = np.nan
- num = num.sum(axis=-1)
- return num - den
- def logpmf(self, x, m, n):
- """Log of the multivariate hypergeometric probability mass function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_doc_default_callparams)s
- Returns
- -------
- logpmf : ndarray or scalar
- Log of the probability mass function evaluated at `x`
- Notes
- -----
- %(_doc_callparams_note)s
- """
- M, m, n, mcond, ncond, mncond = self._process_parameters(m, n)
- (x, M, m, n, xcond,
- xcond_reduced) = self._process_quantiles(x, M, m, n)
- mxcond = mcond | xcond
- ncond = ncond | np.zeros(n.shape, dtype=np.bool_)
- result = self._logpmf(x, M, m, n, mxcond, ncond)
- # replace values for which x was out of the domain; broadcast
- # xcond to the right shape
- xcond_ = xcond_reduced | np.zeros(mncond.shape, dtype=np.bool_)
- result = self._checkresult(result, xcond_, -np.inf)
- # replace values bad for n or m; broadcast
- # mncond to the right shape
- mncond_ = mncond | np.zeros(xcond_reduced.shape, dtype=np.bool_)
- return self._checkresult(result, mncond_, np.nan)
- def pmf(self, x, m, n):
- """Multivariate hypergeometric probability mass function.
- Parameters
- ----------
- x : array_like
- Quantiles, with the last axis of `x` denoting the components.
- %(_doc_default_callparams)s
- Returns
- -------
- pmf : ndarray or scalar
- Probability density function evaluated at `x`
- Notes
- -----
- %(_doc_callparams_note)s
- """
- out = np.exp(self.logpmf(x, m, n))
- return out
- def mean(self, m, n):
- """Mean of the multivariate hypergeometric distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- mean : array_like or scalar
- The mean of the distribution
- """
- M, m, n, _, _, mncond = self._process_parameters(m, n)
- # check for empty arrays
- if m.size != 0:
- M, n = M[..., np.newaxis], n[..., np.newaxis]
- cond = (M == 0)
- M = np.ma.masked_array(M, mask=cond)
- mu = n*(m/M)
- if m.size != 0:
- mncond = (mncond[..., np.newaxis] |
- np.zeros(mu.shape, dtype=np.bool_))
- return self._checkresult(mu, mncond, np.nan)
- def var(self, m, n):
- """Variance of the multivariate hypergeometric distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- array_like
- The variances of the components of the distribution. This is
- the diagonal of the covariance matrix of the distribution
- """
- M, m, n, _, _, mncond = self._process_parameters(m, n)
- # check for empty arrays
- if m.size != 0:
- M, n = M[..., np.newaxis], n[..., np.newaxis]
- cond = (M == 0) & (M-1 == 0)
- M = np.ma.masked_array(M, mask=cond)
- output = n * m/M * (M-m)/M * (M-n)/(M-1)
- if m.size != 0:
- mncond = (mncond[..., np.newaxis] |
- np.zeros(output.shape, dtype=np.bool_))
- return self._checkresult(output, mncond, np.nan)
- def cov(self, m, n):
- """Covariance matrix of the multivariate hypergeometric distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- Returns
- -------
- cov : array_like
- The covariance matrix of the distribution
- """
- # see [1]_ for the formula and [2]_ for implementation
- # cov( x_i,x_j ) = -n * (M-n)/(M-1) * (K_i*K_j) / (M**2)
- M, m, n, _, _, mncond = self._process_parameters(m, n)
- # check for empty arrays
- if m.size != 0:
- M = M[..., np.newaxis, np.newaxis]
- n = n[..., np.newaxis, np.newaxis]
- cond = (M == 0) & (M-1 == 0)
- M = np.ma.masked_array(M, mask=cond)
- output = (-n * (M-n)/(M-1) *
- np.einsum("...i,...j->...ij", m, m) / (M**2))
- # check for empty arrays
- if m.size != 0:
- M, n = M[..., 0, 0], n[..., 0, 0]
- cond = cond[..., 0, 0]
- dim = m.shape[-1]
- # diagonal entries need to be computed differently
- for i in range(dim):
- output[..., i, i] = (n * (M-n) * m[..., i]*(M-m[..., i]))
- output[..., i, i] = output[..., i, i] / (M-1)
- output[..., i, i] = output[..., i, i] / (M**2)
- if m.size != 0:
- mncond = (mncond[..., np.newaxis, np.newaxis] |
- np.zeros(output.shape, dtype=np.bool_))
- return self._checkresult(output, mncond, np.nan)
- def rvs(self, m, n, size=None, random_state=None):
- """Draw random samples from a multivariate hypergeometric distribution.
- Parameters
- ----------
- %(_doc_default_callparams)s
- size : integer or iterable of integers, optional
- Number of samples to draw. Default is ``None``, in which case a
- single variate is returned as an array with shape ``m.shape``.
- %(_doc_random_state)s
- Returns
- -------
- rvs : array_like
- Random variates of shape ``size`` or ``m.shape``
- (if ``size=None``).
- Notes
- -----
- %(_doc_callparams_note)s
- Also note that NumPy's `multivariate_hypergeometric` sampler is not
- used as it doesn't support broadcasting.
- """
- M, m, n, _, _, _ = self._process_parameters(m, n)
- random_state = self._get_random_state(random_state)
- if size is not None and isinstance(size, int):
- size = (size, )
- if size is None:
- rvs = np.empty(m.shape, dtype=m.dtype)
- else:
- rvs = np.empty(size + (m.shape[-1], ), dtype=m.dtype)
- rem = M
- # This sampler has been taken from numpy gh-13794
- # https://github.com/numpy/numpy/pull/13794
- for c in range(m.shape[-1] - 1):
- rem = rem - m[..., c]
- n0mask = n == 0
- rvs[..., c] = (~n0mask *
- random_state.hypergeometric(m[..., c],
- rem + n0mask,
- n + n0mask,
- size=size))
- n = n - rvs[..., c]
- rvs[..., m.shape[-1] - 1] = n
- return rvs
- multivariate_hypergeom = multivariate_hypergeom_gen()
- class multivariate_hypergeom_frozen(multi_rv_frozen):
- def __init__(self, m, n, seed=None):
- self._dist = multivariate_hypergeom_gen(seed)
- (self.M, self.m, self.n,
- self.mcond, self.ncond,
- self.mncond) = self._dist._process_parameters(m, n)
- # monkey patch self._dist
- def _process_parameters(m, n):
- return (self.M, self.m, self.n,
- self.mcond, self.ncond,
- self.mncond)
- self._dist._process_parameters = _process_parameters
- def logpmf(self, x):
- return self._dist.logpmf(x, self.m, self.n)
- def pmf(self, x):
- return self._dist.pmf(x, self.m, self.n)
- def mean(self):
- return self._dist.mean(self.m, self.n)
- def var(self):
- return self._dist.var(self.m, self.n)
- def cov(self):
- return self._dist.cov(self.m, self.n)
- def rvs(self, size=1, random_state=None):
- return self._dist.rvs(self.m, self.n,
- size=size,
- random_state=random_state)
- # Set frozen generator docstrings from corresponding docstrings in
- # multivariate_hypergeom and fill in default strings in class docstrings
- for name in ['logpmf', 'pmf', 'mean', 'var', 'cov', 'rvs']:
- method = multivariate_hypergeom_gen.__dict__[name]
- method_frozen = multivariate_hypergeom_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(
- method.__doc__, mhg_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__,
- mhg_docdict_params)
- class random_table_gen(multi_rv_generic):
- r"""Contingency tables from independent samples with fixed marginal sums.
- This is the distribution of random tables with given row and column vector
- sums. This distribution represents the set of random tables under the null
- hypothesis that rows and columns are independent. It is used in hypothesis
- tests of independence.
- Because of assumed independence, the expected frequency of each table
- element can be computed from the row and column sums, so that the
- distribution is completely determined by these two vectors.
- Methods
- -------
- logpmf(x)
- Log-probability of table `x` to occur in the distribution.
- pmf(x)
- Probability of table `x` to occur in the distribution.
- mean(row, col)
- Mean table.
- rvs(row, col, size=None, method=None, random_state=None)
- Draw random tables with given row and column vector sums.
- Parameters
- ----------
- %(_doc_row_col)s
- %(_doc_random_state)s
- Notes
- -----
- %(_doc_row_col_note)s
- Random elements from the distribution are generated either with Boyett's
- [1]_ or Patefield's algorithm [2]_. Boyett's algorithm has
- O(N) time and space complexity, where N is the total sum of entries in the
- table. Patefield's algorithm has O(K x log(N)) time complexity, where K is
- the number of cells in the table and requires only a small constant work
- space. By default, the `rvs` method selects the fastest algorithm based on
- the input, but you can specify the algorithm with the keyword `method`.
- Allowed values are "boyett" and "patefield".
- .. versionadded:: 1.10.0
- Examples
- --------
- >>> from scipy.stats import random_table
- >>> row = [1, 5]
- >>> col = [2, 3, 1]
- >>> random_table.mean(row, col)
- array([[0.33333333, 0.5 , 0.16666667],
- [1.66666667, 2.5 , 0.83333333]])
- Alternatively, the object may be called (as a function) to fix the row
- and column vector sums, returning a "frozen" distribution.
- >>> dist = random_table(row, col)
- >>> dist.rvs(random_state=123)
- array([[1, 0, 0],
- [1, 3, 1]])
- References
- ----------
- .. [1] J. Boyett, AS 144 Appl. Statist. 28 (1979) 329-332
- .. [2] W.M. Patefield, AS 159 Appl. Statist. 30 (1981) 91-97
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- def __call__(self, row, col, *, seed=None):
- """Create a frozen distribution of tables with given marginals.
- See `random_table_frozen` for more information.
- """
- return random_table_frozen(row, col, seed=seed)
- def logpmf(self, x, row, col):
- """Log-probability of table to occur in the distribution.
- Parameters
- ----------
- %(_doc_x)s
- %(_doc_row_col)s
- Returns
- -------
- logpmf : ndarray or scalar
- Log of the probability mass function evaluated at `x`.
- Notes
- -----
- %(_doc_row_col_note)s
- If row and column marginals of `x` do not match `row` and `col`,
- negative infinity is returned.
- Examples
- --------
- >>> from scipy.stats import random_table
- >>> import numpy as np
- >>> x = [[1, 5, 1], [2, 3, 1]]
- >>> row = np.sum(x, axis=1)
- >>> col = np.sum(x, axis=0)
- >>> random_table.logpmf(x, row, col)
- -1.6306401200847027
- Alternatively, the object may be called (as a function) to fix the row
- and column vector sums, returning a "frozen" distribution.
- >>> d = random_table(row, col)
- >>> d.logpmf(x)
- -1.6306401200847027
- """
- r, c, n = self._process_parameters(row, col)
- x = np.asarray(x)
- if x.ndim < 2:
- raise ValueError("`x` must be at least two-dimensional")
- dtype_is_int = np.issubdtype(x.dtype, np.integer)
- with np.errstate(invalid='ignore'):
- if not dtype_is_int and not np.all(x.astype(int) == x):
- raise ValueError("`x` must contain only integral values")
- # x does not contain NaN if we arrive here
- if np.any(x < 0):
- raise ValueError("`x` must contain only non-negative values")
- r2 = np.sum(x, axis=-1)
- c2 = np.sum(x, axis=-2)
- if r2.shape[-1] != len(r):
- raise ValueError("shape of `x` must agree with `row`")
- if c2.shape[-1] != len(c):
- raise ValueError("shape of `x` must agree with `col`")
- res = np.empty(x.shape[:-2])
- mask = np.all(r2 == r, axis=-1) & np.all(c2 == c, axis=-1)
- def lnfac(x):
- return gammaln(x + 1)
- res[mask] = (np.sum(lnfac(r), axis=-1) + np.sum(lnfac(c), axis=-1)
- - lnfac(n) - np.sum(lnfac(x[mask]), axis=(-1, -2)))
- res[~mask] = -np.inf
- return res[()]
- def pmf(self, x, row, col):
- """Probability of table to occur in the distribution.
- Parameters
- ----------
- %(_doc_x)s
- %(_doc_row_col)s
- Returns
- -------
- pmf : ndarray or scalar
- Probability mass function evaluated at `x`.
- Notes
- -----
- %(_doc_row_col_note)s
- If row and column marginals of `x` do not match `row` and `col`,
- zero is returned.
- Examples
- --------
- >>> from scipy.stats import random_table
- >>> import numpy as np
- >>> x = [[1, 5, 1], [2, 3, 1]]
- >>> row = np.sum(x, axis=1)
- >>> col = np.sum(x, axis=0)
- >>> random_table.pmf(x, row, col)
- 0.19580419580419592
- Alternatively, the object may be called (as a function) to fix the row
- and column vector sums, returning a "frozen" distribution.
- >>> d = random_table(row, col)
- >>> d.pmf(x)
- 0.19580419580419592
- """
- return np.exp(self.logpmf(x, row, col))
- def mean(self, row, col):
- """Mean of distribution of conditional tables.
- %(_doc_mean_params)s
- Returns
- -------
- mean: ndarray
- Mean of the distribution.
- Notes
- -----
- %(_doc_row_col_note)s
- Examples
- --------
- >>> from scipy.stats import random_table
- >>> row = [1, 5]
- >>> col = [2, 3, 1]
- >>> random_table.mean(row, col)
- array([[0.33333333, 0.5 , 0.16666667],
- [1.66666667, 2.5 , 0.83333333]])
- Alternatively, the object may be called (as a function) to fix the row
- and column vector sums, returning a "frozen" distribution.
- >>> d = random_table(row, col)
- >>> d.mean()
- array([[0.33333333, 0.5 , 0.16666667],
- [1.66666667, 2.5 , 0.83333333]])
- """
- r, c, n = self._process_parameters(row, col)
- return np.outer(r, c) / n
- def rvs(self, row, col, *, size=None, method=None, random_state=None):
- """Draw random tables with fixed column and row marginals.
- Parameters
- ----------
- %(_doc_row_col)s
- size : integer, optional
- Number of samples to draw (default 1).
- method : str, optional
- Which method to use, "boyett" or "patefield". If None (default),
- selects the fastest method for this input.
- %(_doc_random_state)s
- Returns
- -------
- rvs : ndarray
- Random 2D tables of shape (`size`, `len(row)`, `len(col)`).
- Notes
- -----
- %(_doc_row_col_note)s
- Examples
- --------
- >>> from scipy.stats import random_table
- >>> row = [1, 5]
- >>> col = [2, 3, 1]
- >>> random_table.rvs(row, col, random_state=123)
- array([[1., 0., 0.],
- [1., 3., 1.]])
- Alternatively, the object may be called (as a function) to fix the row
- and column vector sums, returning a "frozen" distribution.
- >>> d = random_table(row, col)
- >>> d.rvs(random_state=123)
- array([[1., 0., 0.],
- [1., 3., 1.]])
- """
- r, c, n = self._process_parameters(row, col)
- size, shape = self._process_size_shape(size, r, c)
- random_state = self._get_random_state(random_state)
- meth = self._process_rvs_method(method, r, c, n)
- return meth(r, c, n, size, random_state).reshape(shape)
- @staticmethod
- def _process_parameters(row, col):
- """
- Check that row and column vectors are one-dimensional, that they do
- not contain negative or non-integer entries, and that the sums over
- both vectors are equal.
- """
- r = np.array(row, dtype=np.int64, copy=True)
- c = np.array(col, dtype=np.int64, copy=True)
- if np.ndim(r) != 1:
- raise ValueError("`row` must be one-dimensional")
- if np.ndim(c) != 1:
- raise ValueError("`col` must be one-dimensional")
- if np.any(r < 0):
- raise ValueError("each element of `row` must be non-negative")
- if np.any(c < 0):
- raise ValueError("each element of `col` must be non-negative")
- n = np.sum(r)
- if n != np.sum(c):
- raise ValueError("sums over `row` and `col` must be equal")
- if not np.all(r == np.asarray(row)):
- raise ValueError("each element of `row` must be an integer")
- if not np.all(c == np.asarray(col)):
- raise ValueError("each element of `col` must be an integer")
- return r, c, n
- @staticmethod
- def _process_size_shape(size, r, c):
- """
- Compute the number of samples to be drawn and the shape of the output
- """
- shape = (len(r), len(c))
- if size is None:
- return 1, shape
- size = np.atleast_1d(size)
- if not np.issubdtype(size.dtype, np.integer) or np.any(size < 0):
- raise ValueError("`size` must be a non-negative integer or `None`")
- return np.prod(size), tuple(size) + shape
- @classmethod
- def _process_rvs_method(cls, method, r, c, n):
- known_methods = {
- None: cls._rvs_select(r, c, n),
- "boyett": cls._rvs_boyett,
- "patefield": cls._rvs_patefield,
- }
- try:
- return known_methods[method]
- except KeyError:
- raise ValueError(f"'{method}' not recognized, "
- f"must be one of {set(known_methods)}")
- @classmethod
- def _rvs_select(cls, r, c, n):
- fac = 1.0 # benchmarks show that this value is about 1
- k = len(r) * len(c) # number of cells
- # n + 1 guards against failure if n == 0
- if n > fac * np.log(n + 1) * k:
- return cls._rvs_patefield
- return cls._rvs_boyett
- @staticmethod
- def _rvs_boyett(row, col, ntot, size, random_state):
- return _rcont.rvs_rcont1(row, col, ntot, size, random_state)
- @staticmethod
- def _rvs_patefield(row, col, ntot, size, random_state):
- return _rcont.rvs_rcont2(row, col, ntot, size, random_state)
- random_table = random_table_gen()
- class random_table_frozen(multi_rv_frozen):
- __class_getitem__ = None
- def __init__(self, row, col, *, seed=None):
- self._dist = random_table_gen(seed)
- self._params = self._dist._process_parameters(row, col)
- # monkey patch self._dist
- def _process_parameters(r, c):
- return self._params
- self._dist._process_parameters = _process_parameters
- def logpmf(self, x):
- return self._dist.logpmf(x, None, None)
- def pmf(self, x):
- return self._dist.pmf(x, None, None)
- def mean(self):
- return self._dist.mean(None, None)
- def rvs(self, size=None, method=None, random_state=None):
- # optimisations are possible here
- return self._dist.rvs(None, None, size=size, method=method,
- random_state=random_state)
- _ctab_doc_row_col = """\
- row : array_like
- Sum of table entries in each row.
- col : array_like
- Sum of table entries in each column."""
- _ctab_doc_x = """\
- x : array-like
- Two-dimensional table of non-negative integers, or a
- multi-dimensional array with the last two dimensions
- corresponding with the tables."""
- _ctab_doc_row_col_note = """\
- The row and column vectors must be one-dimensional, not empty,
- and each sum up to the same value. They cannot contain negative
- or noninteger entries."""
- _ctab_doc_mean_params = f"""
- Parameters
- ----------
- {_ctab_doc_row_col}"""
- _ctab_doc_row_col_note_frozen = """\
- See class definition for a detailed description of parameters."""
- _ctab_docdict = {
- "_doc_random_state": _doc_random_state,
- "_doc_row_col": _ctab_doc_row_col,
- "_doc_x": _ctab_doc_x,
- "_doc_mean_params": _ctab_doc_mean_params,
- "_doc_row_col_note": _ctab_doc_row_col_note,
- }
- _ctab_docdict_frozen = _ctab_docdict.copy()
- _ctab_docdict_frozen.update({
- "_doc_row_col": "",
- "_doc_mean_params": "",
- "_doc_row_col_note": _ctab_doc_row_col_note_frozen,
- })
- def _docfill(obj, docdict, template=None):
- obj.__doc__ = doccer.docformat(template or obj.__doc__, docdict)
- # Set frozen generator docstrings from corresponding docstrings in
- # random_table and fill in default strings in class docstrings
- _docfill(random_table_gen, _ctab_docdict)
- for name in ['logpmf', 'pmf', 'mean', 'rvs']:
- method = random_table_gen.__dict__[name]
- method_frozen = random_table_frozen.__dict__[name]
- _docfill(method_frozen, _ctab_docdict_frozen, method.__doc__)
- _docfill(method, _ctab_docdict)
- class uniform_direction_gen(multi_rv_generic):
- r"""A vector-valued uniform direction.
- Return a random direction (unit vector). The `dim` keyword specifies
- the dimensionality of the space.
- Methods
- -------
- rvs(dim=None, size=1, random_state=None)
- Draw random directions.
- Parameters
- ----------
- dim : scalar
- Dimension of directions.
- seed : {None, int, `numpy.random.Generator`,
- `numpy.random.RandomState`}, optional
- Used for drawing random variates.
- If `seed` is `None`, the `~np.random.RandomState` singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used, seeded
- with seed.
- If `seed` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- Notes
- -----
- This distribution generates unit vectors uniformly distributed on
- the surface of a hypersphere. These can be interpreted as random
- directions.
- For example, if `dim` is 3, 3D vectors from the surface of :math:`S^2`
- will be sampled.
- References
- ----------
- .. [1] Marsaglia, G. (1972). "Choosing a Point from the Surface of a
- Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.stats import uniform_direction
- >>> x = uniform_direction.rvs(3)
- >>> np.linalg.norm(x)
- 1.
- This generates one random direction, a vector on the surface of
- :math:`S^2`.
- Alternatively, the object may be called (as a function) to return a frozen
- distribution with fixed `dim` parameter. Here,
- we create a `uniform_direction` with ``dim=3`` and draw 5 observations.
- The samples are then arranged in an array of shape 5x3.
- >>> rng = np.random.default_rng()
- >>> uniform_sphere_dist = uniform_direction(3)
- >>> unit_vectors = uniform_sphere_dist.rvs(5, random_state=rng)
- >>> unit_vectors
- array([[ 0.56688642, -0.1332634 , -0.81294566],
- [-0.427126 , -0.74779278, 0.50830044],
- [ 0.3793989 , 0.92346629, 0.05715323],
- [ 0.36428383, -0.92449076, -0.11231259],
- [-0.27733285, 0.94410968, -0.17816678]])
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__)
- def __call__(self, dim=None, seed=None):
- """Create a frozen n-dimensional uniform direction distribution.
- See `uniform_direction` for more information.
- """
- return uniform_direction_frozen(dim, seed=seed)
- def _process_parameters(self, dim):
- """Dimension N must be specified; it cannot be inferred."""
- if dim is None or not np.isscalar(dim) or dim < 1 or dim != int(dim):
- raise ValueError("Dimension of vector must be specified, "
- "and must be an integer greater than 0.")
- return int(dim)
- def rvs(self, dim, size=None, random_state=None):
- """Draw random samples from S(N-1).
- Parameters
- ----------
- dim : integer
- Dimension of space (N).
- size : int or tuple of ints, optional
- Given a shape of, for example, (m,n,k), m*n*k samples are
- generated, and packed in an m-by-n-by-k arrangement.
- Because each sample is N-dimensional, the output shape
- is (m,n,k,N). If no shape is specified, a single (N-D)
- sample is returned.
- random_state : {None, int, `numpy.random.Generator`,
- `numpy.random.RandomState`}, optional
- Pseudorandom number generator state used to generate resamples.
- If `random_state` is ``None`` (or `np.random`), the
- `numpy.random.RandomState` singleton is used.
- If `random_state` is an int, a new ``RandomState`` instance is
- used, seeded with `random_state`.
- If `random_state` is already a ``Generator`` or ``RandomState``
- instance then that instance is used.
- Returns
- -------
- rvs : ndarray
- Random direction vectors
- """
- random_state = self._get_random_state(random_state)
- if size is None:
- size = np.array([], dtype=int)
- size = np.atleast_1d(size)
- dim = self._process_parameters(dim)
- samples = _sample_uniform_direction(dim, size, random_state)
- return samples
- uniform_direction = uniform_direction_gen()
- class uniform_direction_frozen(multi_rv_frozen):
- def __init__(self, dim=None, seed=None):
- """Create a frozen n-dimensional uniform direction distribution.
- Parameters
- ----------
- dim : int
- Dimension of matrices
- seed : {None, int, `numpy.random.Generator`,
- `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- Examples
- --------
- >>> from scipy.stats import uniform_direction
- >>> x = uniform_direction(3)
- >>> x.rvs()
- """
- self._dist = uniform_direction_gen(seed)
- self.dim = self._dist._process_parameters(dim)
- def rvs(self, size=None, random_state=None):
- return self._dist.rvs(self.dim, size, random_state)
- def _sample_uniform_direction(dim, size, random_state):
- """
- Private method to generate uniform directions
- Reference: Marsaglia, G. (1972). "Choosing a Point from the Surface of a
- Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.
- """
- samples_shape = np.append(size, dim)
- samples = random_state.standard_normal(samples_shape)
- samples /= np.linalg.norm(samples, axis=-1, keepdims=True)
- return samples
- _dirichlet_mn_doc_default_callparams = """\
- alpha : array_like
- The concentration parameters. The number of entries along the last axis
- determines the dimensionality of the distribution. Each entry must be
- strictly positive.
- n : int or array_like
- The number of trials. Each element must be a non-negative integer.
- """
- _dirichlet_mn_doc_frozen_callparams = ""
- _dirichlet_mn_doc_frozen_callparams_note = """\
- See class definition for a detailed description of parameters."""
- dirichlet_mn_docdict_params = {
- '_dirichlet_mn_doc_default_callparams': _dirichlet_mn_doc_default_callparams,
- '_doc_random_state': _doc_random_state
- }
- dirichlet_mn_docdict_noparams = {
- '_dirichlet_mn_doc_default_callparams': _dirichlet_mn_doc_frozen_callparams,
- '_doc_random_state': _doc_random_state
- }
- def _dirichlet_multinomial_check_parameters(alpha, n, x=None):
- alpha = np.asarray(alpha)
- n = np.asarray(n)
- if x is not None:
- # Ensure that `x` and `alpha` are arrays. If the shapes are
- # incompatible, NumPy will raise an appropriate error.
- try:
- x, alpha = np.broadcast_arrays(x, alpha)
- except ValueError as e:
- msg = "`x` and `alpha` must be broadcastable."
- raise ValueError(msg) from e
- x_int = np.floor(x)
- if np.any(x < 0) or np.any(x != x_int):
- raise ValueError("`x` must contain only non-negative integers.")
- x = x_int
- if np.any(alpha <= 0):
- raise ValueError("`alpha` must contain only positive values.")
- n_int = np.floor(n)
- if np.any(n < 0) or np.any(n != n_int):
- raise ValueError("`n` must be a non-negative integer.")
- n = n_int
- sum_alpha = np.sum(alpha, axis=-1)
- sum_alpha, n = np.broadcast_arrays(sum_alpha, n)
- return (alpha, sum_alpha, n) if x is None else (alpha, sum_alpha, n, x)
- class dirichlet_multinomial_gen(multi_rv_generic):
- r"""A Dirichlet multinomial random variable.
- The Dirichlet multinomial distribution is a compound probability
- distribution: it is the multinomial distribution with number of trials
- `n` and class probabilities ``p`` randomly sampled from a Dirichlet
- distribution with concentration parameters ``alpha``.
- Methods
- -------
- logpmf(x, alpha, n):
- Log of the probability mass function.
- pmf(x, alpha, n):
- Probability mass function.
- mean(alpha, n):
- Mean of the Dirichlet multinomial distribution.
- var(alpha, n):
- Variance of the Dirichlet multinomial distribution.
- cov(alpha, n):
- The covariance of the Dirichlet multinomial distribution.
- Parameters
- ----------
- %(_dirichlet_mn_doc_default_callparams)s
- %(_doc_random_state)s
- See Also
- --------
- scipy.stats.dirichlet : The dirichlet distribution.
- scipy.stats.multinomial : The multinomial distribution.
- References
- ----------
- .. [1] Dirichlet-multinomial distribution, Wikipedia,
- https://www.wikipedia.org/wiki/Dirichlet-multinomial_distribution
- Examples
- --------
- >>> from scipy.stats import dirichlet_multinomial
- Get the PMF
- >>> n = 6 # number of trials
- >>> alpha = [3, 4, 5] # concentration parameters
- >>> x = [1, 2, 3] # counts
- >>> dirichlet_multinomial.pmf(x, alpha, n)
- 0.08484162895927604
- If the sum of category counts does not equal the number of trials,
- the probability mass is zero.
- >>> dirichlet_multinomial.pmf(x, alpha, n=7)
- 0.0
- Get the log of the PMF
- >>> dirichlet_multinomial.logpmf(x, alpha, n)
- -2.4669689491013327
- Get the mean
- >>> dirichlet_multinomial.mean(alpha, n)
- array([1.5, 2. , 2.5])
- Get the variance
- >>> dirichlet_multinomial.var(alpha, n)
- array([1.55769231, 1.84615385, 2.01923077])
- Get the covariance
- >>> dirichlet_multinomial.cov(alpha, n)
- array([[ 1.55769231, -0.69230769, -0.86538462],
- [-0.69230769, 1.84615385, -1.15384615],
- [-0.86538462, -1.15384615, 2.01923077]])
- Alternatively, the object may be called (as a function) to fix the
- `alpha` and `n` parameters, returning a "frozen" Dirichlet multinomial
- random variable.
- >>> dm = dirichlet_multinomial(alpha, n)
- >>> dm.pmf(x)
- 0.08484162895927579
- All methods are fully vectorized. Each element of `x` and `alpha` is
- a vector (along the last axis), each element of `n` is an
- integer (scalar), and the result is computed element-wise.
- >>> x = [[1, 2, 3], [4, 5, 6]]
- >>> alpha = [[1, 2, 3], [4, 5, 6]]
- >>> n = [6, 15]
- >>> dirichlet_multinomial.pmf(x, alpha, n)
- array([0.06493506, 0.02626937])
- >>> dirichlet_multinomial.cov(alpha, n).shape # both covariance matrices
- (2, 3, 3)
- Broadcasting according to standard NumPy conventions is supported. Here,
- we have four sets of concentration parameters (each a two element vector)
- for each of three numbers of trials (each a scalar).
- >>> alpha = [[3, 4], [4, 5], [5, 6], [6, 7]]
- >>> n = [[6], [7], [8]]
- >>> dirichlet_multinomial.mean(alpha, n).shape
- (3, 4, 2)
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- self.__doc__ = doccer.docformat(self.__doc__,
- dirichlet_mn_docdict_params)
- def __call__(self, alpha, n, seed=None):
- return dirichlet_multinomial_frozen(alpha, n, seed=seed)
- def logpmf(self, x, alpha, n):
- """The log of the probability mass function.
- Parameters
- ----------
- x: ndarray
- Category counts (non-negative integers). Must be broadcastable
- with shape parameter ``alpha``. If multidimensional, the last axis
- must correspond with the categories.
- %(_dirichlet_mn_doc_default_callparams)s
- Returns
- -------
- out: ndarray or scalar
- Log of the probability mass function.
- """
- a, Sa, n, x = _dirichlet_multinomial_check_parameters(alpha, n, x)
- out = np.asarray(loggamma(Sa) + loggamma(n + 1) - loggamma(n + Sa))
- out += (loggamma(x + a) - (loggamma(a) + loggamma(x + 1))).sum(axis=-1)
- np.place(out, n != x.sum(axis=-1), -np.inf)
- return out[()]
- def pmf(self, x, alpha, n):
- """Probability mass function for a Dirichlet multinomial distribution.
- Parameters
- ----------
- x: ndarray
- Category counts (non-negative integers). Must be broadcastable
- with shape parameter ``alpha``. If multidimensional, the last axis
- must correspond with the categories.
- %(_dirichlet_mn_doc_default_callparams)s
- Returns
- -------
- out: ndarray or scalar
- Probability mass function.
- """
- return np.exp(self.logpmf(x, alpha, n))
- def mean(self, alpha, n):
- """Mean of a Dirichlet multinomial distribution.
- Parameters
- ----------
- %(_dirichlet_mn_doc_default_callparams)s
- Returns
- -------
- out: ndarray
- Mean of a Dirichlet multinomial distribution.
- """
- a, Sa, n = _dirichlet_multinomial_check_parameters(alpha, n)
- n, Sa = n[..., np.newaxis], Sa[..., np.newaxis]
- return n * a / Sa
- def var(self, alpha, n):
- """The variance of the Dirichlet multinomial distribution.
- Parameters
- ----------
- %(_dirichlet_mn_doc_default_callparams)s
- Returns
- -------
- out: array_like
- The variances of the components of the distribution. This is
- the diagonal of the covariance matrix of the distribution.
- """
- a, Sa, n = _dirichlet_multinomial_check_parameters(alpha, n)
- n, Sa = n[..., np.newaxis], Sa[..., np.newaxis]
- return n * a / Sa * (1 - a/Sa) * (n + Sa) / (1 + Sa)
- def cov(self, alpha, n):
- """Covariance matrix of a Dirichlet multinomial distribution.
- Parameters
- ----------
- %(_dirichlet_mn_doc_default_callparams)s
- Returns
- -------
- out : array_like
- The covariance matrix of the distribution.
- """
- a, Sa, n = _dirichlet_multinomial_check_parameters(alpha, n)
- var = dirichlet_multinomial.var(a, n)
- n, Sa = n[..., np.newaxis, np.newaxis], Sa[..., np.newaxis, np.newaxis]
- aiaj = a[..., :, np.newaxis] * a[..., np.newaxis, :]
- cov = -n * aiaj / Sa ** 2 * (n + Sa) / (1 + Sa)
- ii = np.arange(cov.shape[-1])
- cov[..., ii, ii] = var
- return cov
- dirichlet_multinomial = dirichlet_multinomial_gen()
- class dirichlet_multinomial_frozen(multi_rv_frozen):
- def __init__(self, alpha, n, seed=None):
- alpha, Sa, n = _dirichlet_multinomial_check_parameters(alpha, n)
- self.alpha = alpha
- self.n = n
- self._dist = dirichlet_multinomial_gen(seed)
- def logpmf(self, x):
- return self._dist.logpmf(x, self.alpha, self.n)
- def pmf(self, x):
- return self._dist.pmf(x, self.alpha, self.n)
- def mean(self):
- return self._dist.mean(self.alpha, self.n)
- def var(self):
- return self._dist.var(self.alpha, self.n)
- def cov(self):
- return self._dist.cov(self.alpha, self.n)
- # Set frozen generator docstrings from corresponding docstrings in
- # dirichlet_multinomial and fill in default strings in class docstrings.
- for name in ['logpmf', 'pmf', 'mean', 'var', 'cov']:
- method = dirichlet_multinomial_gen.__dict__[name]
- method_frozen = dirichlet_multinomial_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(
- method.__doc__, dirichlet_mn_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__,
- dirichlet_mn_docdict_params)
- class vonmises_fisher_gen(multi_rv_generic):
- r"""A von Mises-Fisher variable.
- The `mu` keyword specifies the mean direction vector. The `kappa` keyword
- specifies the concentration parameter.
- Methods
- -------
- pdf(x, mu=None, kappa=1)
- Probability density function.
- logpdf(x, mu=None, kappa=1)
- Log of the probability density function.
- rvs(mu=None, kappa=1, size=1, random_state=None)
- Draw random samples from a von Mises-Fisher distribution.
- entropy(mu=None, kappa=1)
- Compute the differential entropy of the von Mises-Fisher distribution.
- fit(data)
- Fit a von Mises-Fisher distribution to data.
- Parameters
- ----------
- mu : array_like
- Mean direction of the distribution. Must be a one-dimensional unit
- vector of norm 1.
- kappa : float
- Concentration parameter. Must be positive.
- seed : {None, int, np.random.RandomState, np.random.Generator}, optional
- Used for drawing random variates.
- If `seed` is `None`, the `~np.random.RandomState` singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used, seeded
- with seed.
- If `seed` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- See Also
- --------
- scipy.stats.vonmises : Von-Mises Fisher distribution in 2D on a circle
- uniform_direction : uniform distribution on the surface of a hypersphere
- Notes
- -----
- The von Mises-Fisher distribution is a directional distribution on the
- surface of the unit hypersphere. The probability density
- function of a unit vector :math:`\mathbf{x}` is
- .. math::
- f(\mathbf{x}) = \frac{\kappa^{d/2-1}}{(2\pi)^{d/2}I_{d/2-1}(\kappa)}
- \exp\left(\kappa \mathbf{\mu}^T\mathbf{x}\right),
- where :math:`\mathbf{\mu}` is the mean direction, :math:`\kappa` the
- concentration parameter, :math:`d` the dimension and :math:`I` the
- modified Bessel function of the first kind. As :math:`\mu` represents
- a direction, it must be a unit vector or in other words, a point
- on the hypersphere: :math:`\mathbf{\mu}\in S^{d-1}`. :math:`\kappa` is a
- concentration parameter, which means that it must be positive
- (:math:`\kappa>0`) and that the distribution becomes more narrow with
- increasing :math:`\kappa`. In that sense, the reciprocal value
- :math:`1/\kappa` resembles the variance parameter of the normal
- distribution.
- The von Mises-Fisher distribution often serves as an analogue of the
- normal distribution on the sphere. Intuitively, for unit vectors, a
- useful distance measure is given by the angle :math:`\alpha` between
- them. This is exactly what the scalar product
- :math:`\mathbf{\mu}^T\mathbf{x}=\cos(\alpha)` in the
- von Mises-Fisher probability density function describes: the angle
- between the mean direction :math:`\mathbf{\mu}` and the vector
- :math:`\mathbf{x}`. The larger the angle between them, the smaller the
- probability to observe :math:`\mathbf{x}` for this particular mean
- direction :math:`\mathbf{\mu}`.
- In dimensions 2 and 3, specialized algorithms are used for fast sampling
- [2]_, [3]_. For dimensions of 4 or higher the rejection sampling algorithm
- described in [4]_ is utilized. This implementation is partially based on
- the geomstats package [5]_, [6]_.
- .. versionadded:: 1.11
- References
- ----------
- .. [1] Von Mises-Fisher distribution, Wikipedia,
- https://en.wikipedia.org/wiki/Von_Mises%E2%80%93Fisher_distribution
- .. [2] Mardia, K., and Jupp, P. Directional statistics. Wiley, 2000.
- .. [3] J. Wenzel. Numerically stable sampling of the von Mises Fisher
- distribution on S2.
- https://www.mitsuba-renderer.org/~wenzel/files/vmf.pdf
- .. [4] Wood, A. Simulation of the von mises fisher distribution.
- Communications in statistics-simulation and computation 23,
- 1 (1994), 157-164. https://doi.org/10.1080/03610919408813161
- .. [5] geomstats, Github. MIT License. Accessed: 06.01.2023.
- https://github.com/geomstats/geomstats
- .. [6] Miolane, N. et al. Geomstats: A Python Package for Riemannian
- Geometry in Machine Learning. Journal of Machine Learning Research
- 21 (2020). http://jmlr.org/papers/v21/19-027.html
- Examples
- --------
- **Visualization of the probability density**
- Plot the probability density in three dimensions for increasing
- concentration parameter. The density is calculated by the ``pdf``
- method.
- >>> import numpy as np
- >>> import matplotlib.pyplot as plt
- >>> from scipy.stats import vonmises_fisher
- >>> from matplotlib.colors import Normalize
- >>> n_grid = 100
- >>> u = np.linspace(0, np.pi, n_grid)
- >>> v = np.linspace(0, 2 * np.pi, n_grid)
- >>> u_grid, v_grid = np.meshgrid(u, v)
- >>> vertices = np.stack([np.cos(v_grid) * np.sin(u_grid),
- ... np.sin(v_grid) * np.sin(u_grid),
- ... np.cos(u_grid)],
- ... axis=2)
- >>> x = np.outer(np.cos(v), np.sin(u))
- >>> y = np.outer(np.sin(v), np.sin(u))
- >>> z = np.outer(np.ones_like(u), np.cos(u))
- >>> def plot_vmf_density(ax, x, y, z, vertices, mu, kappa):
- ... vmf = vonmises_fisher(mu, kappa)
- ... pdf_values = vmf.pdf(vertices)
- ... pdfnorm = Normalize(vmin=pdf_values.min(), vmax=pdf_values.max())
- ... ax.plot_surface(x, y, z, rstride=1, cstride=1,
- ... facecolors=plt.cm.viridis(pdfnorm(pdf_values)),
- ... linewidth=0)
- ... ax.set_aspect('equal')
- ... ax.view_init(azim=-130, elev=0)
- ... ax.axis('off')
- ... ax.set_title(rf"$\kappa={kappa}$")
- >>> fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(9, 4),
- ... subplot_kw={"projection": "3d"})
- >>> left, middle, right = axes
- >>> mu = np.array([-np.sqrt(0.5), -np.sqrt(0.5), 0])
- >>> plot_vmf_density(left, x, y, z, vertices, mu, 5)
- >>> plot_vmf_density(middle, x, y, z, vertices, mu, 20)
- >>> plot_vmf_density(right, x, y, z, vertices, mu, 100)
- >>> plt.subplots_adjust(top=1, bottom=0.0, left=0.0, right=1.0, wspace=0.)
- >>> plt.show()
- As we increase the concentration parameter, the points are getting more
- clustered together around the mean direction.
- **Sampling**
- Draw 5 samples from the distribution using the ``rvs`` method resulting
- in a 5x3 array.
- >>> rng = np.random.default_rng()
- >>> mu = np.array([0, 0, 1])
- >>> samples = vonmises_fisher(mu, 20).rvs(5, random_state=rng)
- >>> samples
- array([[ 0.3884594 , -0.32482588, 0.86231516],
- [ 0.00611366, -0.09878289, 0.99509023],
- [-0.04154772, -0.01637135, 0.99900239],
- [-0.14613735, 0.12553507, 0.98126695],
- [-0.04429884, -0.23474054, 0.97104814]])
- These samples are unit vectors on the sphere :math:`S^2`. To verify,
- let us calculate their euclidean norms:
- >>> np.linalg.norm(samples, axis=1)
- array([1., 1., 1., 1., 1.])
- Plot 20 observations drawn from the von Mises-Fisher distribution for
- increasing concentration parameter :math:`\kappa`. The red dot highlights
- the mean direction :math:`\mu`.
- >>> def plot_vmf_samples(ax, x, y, z, mu, kappa):
- ... vmf = vonmises_fisher(mu, kappa)
- ... samples = vmf.rvs(20)
- ... ax.plot_surface(x, y, z, rstride=1, cstride=1, linewidth=0,
- ... alpha=0.2)
- ... ax.scatter(samples[:, 0], samples[:, 1], samples[:, 2], c='k', s=5)
- ... ax.scatter(mu[0], mu[1], mu[2], c='r', s=30)
- ... ax.set_aspect('equal')
- ... ax.view_init(azim=-130, elev=0)
- ... ax.axis('off')
- ... ax.set_title(rf"$\kappa={kappa}$")
- >>> mu = np.array([-np.sqrt(0.5), -np.sqrt(0.5), 0])
- >>> fig, axes = plt.subplots(nrows=1, ncols=3,
- ... subplot_kw={"projection": "3d"},
- ... figsize=(9, 4))
- >>> left, middle, right = axes
- >>> plot_vmf_samples(left, x, y, z, mu, 5)
- >>> plot_vmf_samples(middle, x, y, z, mu, 20)
- >>> plot_vmf_samples(right, x, y, z, mu, 100)
- >>> plt.subplots_adjust(top=1, bottom=0.0, left=0.0,
- ... right=1.0, wspace=0.)
- >>> plt.show()
- The plots show that with increasing concentration :math:`\kappa` the
- resulting samples are centered more closely around the mean direction.
- **Fitting the distribution parameters**
- The distribution can be fitted to data using the ``fit`` method returning
- the estimated parameters. As a toy example let's fit the distribution to
- samples drawn from a known von Mises-Fisher distribution.
- >>> mu, kappa = np.array([0, 0, 1]), 20
- >>> samples = vonmises_fisher(mu, kappa).rvs(1000, random_state=rng)
- >>> mu_fit, kappa_fit = vonmises_fisher.fit(samples)
- >>> mu_fit, kappa_fit
- (array([0.01126519, 0.01044501, 0.99988199]), 19.306398751730995)
- We see that the estimated parameters `mu_fit` and `kappa_fit` are
- very close to the ground truth parameters.
- """
- def __init__(self, seed=None):
- super().__init__(seed)
- def __call__(self, mu=None, kappa=1, seed=None):
- """Create a frozen von Mises-Fisher distribution.
- See `vonmises_fisher_frozen` for more information.
- """
- return vonmises_fisher_frozen(mu, kappa, seed=seed)
- def _process_parameters(self, mu, kappa):
- """
- Infer dimensionality from mu and ensure that mu is a one-dimensional
- unit vector and kappa positive.
- """
- mu = np.asarray(mu)
- if mu.ndim > 1:
- raise ValueError("'mu' must have one-dimensional shape.")
- if not np.allclose(np.linalg.norm(mu), 1.):
- raise ValueError("'mu' must be a unit vector of norm 1.")
- if not mu.size > 1:
- raise ValueError("'mu' must have at least two entries.")
- kappa_error_msg = "'kappa' must be a positive scalar."
- if not np.isscalar(kappa) or kappa < 0:
- raise ValueError(kappa_error_msg)
- if float(kappa) == 0.:
- raise ValueError("For 'kappa=0' the von Mises-Fisher distribution "
- "becomes the uniform distribution on the sphere "
- "surface. Consider using "
- "'scipy.stats.uniform_direction' instead.")
- dim = mu.size
- return dim, mu, kappa
- def _check_data_vs_dist(self, x, dim):
- if x.shape[-1] != dim:
- raise ValueError("The dimensionality of the last axis of 'x' must "
- "match the dimensionality of the "
- "von Mises Fisher distribution.")
- if not np.allclose(np.linalg.norm(x, axis=-1), 1.):
- msg = "'x' must be unit vectors of norm 1 along last dimension."
- raise ValueError(msg)
- def _log_norm_factor(self, dim, kappa):
- # normalization factor is given by
- # c = kappa**(dim/2-1)/((2*pi)**(dim/2)*I[dim/2-1](kappa))
- # = kappa**(dim/2-1)*exp(-kappa) /
- # ((2*pi)**(dim/2)*I[dim/2-1](kappa)*exp(-kappa)
- # = kappa**(dim/2-1)*exp(-kappa) /
- # ((2*pi)**(dim/2)*ive[dim/2-1](kappa)
- # Then the log is given by
- # log c = 1/2*(dim -1)*log(kappa) - kappa - -1/2*dim*ln(2*pi) -
- # ive[dim/2-1](kappa)
- halfdim = 0.5 * dim
- return (0.5 * (dim - 2)*np.log(kappa) - halfdim * _LOG_2PI -
- np.log(ive(halfdim - 1, kappa)) - kappa)
- def _logpdf(self, x, dim, mu, kappa):
- """Log of the von Mises-Fisher probability density function.
- As this function does no argument checking, it should not be
- called directly; use 'logpdf' instead.
- """
- x = np.asarray(x)
- self._check_data_vs_dist(x, dim)
- dotproducts = np.einsum('i,...i->...', mu, x)
- return self._log_norm_factor(dim, kappa) + kappa * dotproducts
- def logpdf(self, x, mu=None, kappa=1):
- """Log of the von Mises-Fisher probability density function.
- Parameters
- ----------
- x : array_like
- Points at which to evaluate the log of the probability
- density function. The last axis of `x` must correspond
- to unit vectors of the same dimensionality as the distribution.
- mu : array_like, default: None
- Mean direction of the distribution. Must be a one-dimensional unit
- vector of norm 1.
- kappa : float, default: 1
- Concentration parameter. Must be positive.
- Returns
- -------
- logpdf : ndarray or scalar
- Log of the probability density function evaluated at `x`.
- """
- dim, mu, kappa = self._process_parameters(mu, kappa)
- return self._logpdf(x, dim, mu, kappa)
- def pdf(self, x, mu=None, kappa=1):
- """Von Mises-Fisher probability density function.
- Parameters
- ----------
- x : array_like
- Points at which to evaluate the probability
- density function. The last axis of `x` must correspond
- to unit vectors of the same dimensionality as the distribution.
- mu : array_like
- Mean direction of the distribution. Must be a one-dimensional unit
- vector of norm 1.
- kappa : float
- Concentration parameter. Must be positive.
- Returns
- -------
- pdf : ndarray or scalar
- Probability density function evaluated at `x`.
- """
- dim, mu, kappa = self._process_parameters(mu, kappa)
- return np.exp(self._logpdf(x, dim, mu, kappa))
- def _rvs_2d(self, mu, kappa, size, random_state):
- """
- In 2D, the von Mises-Fisher distribution reduces to the
- von Mises distribution which can be efficiently sampled by numpy.
- This method is much faster than the general rejection
- sampling based algorithm.
- """
- mean_angle = np.arctan2(mu[1], mu[0])
- angle_samples = random_state.vonmises(mean_angle, kappa, size=size)
- samples = np.stack([np.cos(angle_samples), np.sin(angle_samples)],
- axis=-1)
- return samples
- def _rvs_3d(self, kappa, size, random_state):
- """
- Generate samples from a von Mises-Fisher distribution
- with mu = [1, 0, 0] and kappa. Samples then have to be
- rotated towards the desired mean direction mu.
- This method is much faster than the general rejection
- sampling based algorithm.
- Reference: https://www.mitsuba-renderer.org/~wenzel/files/vmf.pdf
- """
- if size is None:
- sample_size = 1
- else:
- sample_size = size
- # compute x coordinate acc. to equation from section 3.1
- x = random_state.random(sample_size)
- x = 1. + np.log(x + (1. - x) * np.exp(-2 * kappa))/kappa
- # (y, z) are random 2D vectors that only have to be
- # normalized accordingly. Then (x, y z) follow a VMF distribution
- temp = np.sqrt(1. - np.square(x))
- uniformcircle = _sample_uniform_direction(2, sample_size, random_state)
- samples = np.stack([x, temp * uniformcircle[..., 0],
- temp * uniformcircle[..., 1]],
- axis=-1)
- if size is None:
- samples = np.squeeze(samples)
- return samples
- def _rejection_sampling(self, dim, kappa, size, random_state):
- """
- Generate samples from an n-dimensional von Mises-Fisher distribution
- with mu = [1, 0, ..., 0] and kappa via rejection sampling.
- Samples then have to be rotated towards the desired mean direction mu.
- Reference: https://doi.org/10.1080/03610919408813161
- """
- dim_minus_one = dim - 1
- # calculate number of requested samples
- if size is not None:
- if not np.iterable(size):
- size = (size, )
- n_samples = math.prod(size)
- else:
- n_samples = 1
- # calculate envelope for rejection sampler (eq. 4)
- sqrt = np.sqrt(4 * kappa ** 2. + dim_minus_one ** 2)
- envelop_param = (-2 * kappa + sqrt) / dim_minus_one
- if envelop_param == 0:
- # the regular formula suffers from loss of precision for high
- # kappa. This can only be detected by checking for 0 here.
- # Workaround: expansion for sqrt variable
- # https://www.wolframalpha.com/input?i=sqrt%284*x%5E2%2Bd%5E2%29
- # e = (-2 * k + sqrt(k**2 + d**2)) / d
- # ~ (-2 * k + 2 * k + d**2/(4 * k) - d**4/(64 * k**3)) / d
- # = d/(4 * k) - d**3/(64 * k**3)
- envelop_param = (dim_minus_one/4 * kappa**-1.
- - dim_minus_one**3/64 * kappa**-3.)
- # reference step 0
- node = (1. - envelop_param) / (1. + envelop_param)
- # t = ln(1 - ((1-x)/(1+x))**2)
- # = ln(4 * x / (1+x)**2)
- # = ln(4) + ln(x) - 2*log1p(x)
- correction = (kappa * node + dim_minus_one
- * (np.log(4) + np.log(envelop_param)
- - 2 * np.log1p(envelop_param)))
- n_accepted = 0
- x = np.zeros((n_samples, ))
- halfdim = 0.5 * dim_minus_one
- # main loop
- while n_accepted < n_samples:
- # generate candidates acc. to reference step 1
- sym_beta = random_state.beta(halfdim, halfdim,
- size=n_samples - n_accepted)
- coord_x = (1 - (1 + envelop_param) * sym_beta) / (
- 1 - (1 - envelop_param) * sym_beta)
- # accept or reject: reference step 2
- # reformulation for numerical stability:
- # t = ln(1 - (1-x)/(1+x) * y)
- # = ln((1 + x - y +x*y)/(1 +x))
- accept_tol = random_state.random(n_samples - n_accepted)
- criterion = (
- kappa * coord_x
- + dim_minus_one * (np.log((1 + envelop_param - coord_x
- + coord_x * envelop_param) / (1 + envelop_param)))
- - correction) > np.log(accept_tol)
- accepted_iter = np.sum(criterion)
- x[n_accepted:n_accepted + accepted_iter] = coord_x[criterion]
- n_accepted += accepted_iter
- # concatenate x and remaining coordinates: step 3
- coord_rest = _sample_uniform_direction(dim_minus_one, n_accepted,
- random_state)
- coord_rest = np.einsum(
- '...,...i->...i', np.sqrt(1 - x ** 2), coord_rest)
- samples = np.concatenate([x[..., None], coord_rest], axis=1)
- # reshape output to (size, dim)
- if size is not None:
- samples = samples.reshape(size + (dim, ))
- else:
- samples = np.squeeze(samples)
- return samples
- def _rotate_samples(self, samples, mu, dim):
- """A QR decomposition is used to find the rotation that maps the
- north pole (1, 0,...,0) to the vector mu. This rotation is then
- applied to all samples.
- Parameters
- ----------
- samples: array_like, shape = [..., n]
- mu : array-like, shape=[n, ]
- Point to parametrise the rotation.
- Returns
- -------
- samples : rotated samples
- """
- base_point = np.zeros((dim, ))
- base_point[0] = 1.
- embedded = np.concatenate([mu[None, :], np.zeros((dim - 1, dim))])
- rotmatrix, _ = np.linalg.qr(np.transpose(embedded))
- if np.allclose(np.matmul(rotmatrix, base_point[:, None])[:, 0], mu):
- rotsign = 1
- else:
- rotsign = -1
- # apply rotation
- samples = np.einsum('ij,...j->...i', rotmatrix, samples) * rotsign
- return samples
- def _rvs(self, dim, mu, kappa, size, random_state):
- if dim == 2:
- samples = self._rvs_2d(mu, kappa, size, random_state)
- elif dim == 3:
- samples = self._rvs_3d(kappa, size, random_state)
- else:
- samples = self._rejection_sampling(dim, kappa, size,
- random_state)
- if dim != 2:
- samples = self._rotate_samples(samples, mu, dim)
- return samples
- def rvs(self, mu=None, kappa=1, size=1, random_state=None):
- """Draw random samples from a von Mises-Fisher distribution.
- Parameters
- ----------
- mu : array_like
- Mean direction of the distribution. Must be a one-dimensional unit
- vector of norm 1.
- kappa : float
- Concentration parameter. Must be positive.
- size : int or tuple of ints, optional
- Given a shape of, for example, (m,n,k), m*n*k samples are
- generated, and packed in an m-by-n-by-k arrangement.
- Because each sample is N-dimensional, the output shape
- is (m,n,k,N). If no shape is specified, a single (N-D)
- sample is returned.
- random_state : {None, int, np.random.RandomState, np.random.Generator},
- optional
- Used for drawing random variates.
- If `seed` is `None`, the `~np.random.RandomState` singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used, seeded
- with seed.
- If `seed` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- Returns
- -------
- rvs : ndarray
- Random variates of shape (`size`, `N`), where `N` is the
- dimension of the distribution.
- """
- dim, mu, kappa = self._process_parameters(mu, kappa)
- random_state = self._get_random_state(random_state)
- samples = self._rvs(dim, mu, kappa, size, random_state)
- return samples
- def _entropy(self, dim, kappa):
- halfdim = 0.5 * dim
- return (-self._log_norm_factor(dim, kappa) - kappa *
- ive(halfdim, kappa) / ive(halfdim - 1, kappa))
- def entropy(self, mu=None, kappa=1):
- """Compute the differential entropy of the von Mises-Fisher
- distribution.
- Parameters
- ----------
- mu : array_like, default: None
- Mean direction of the distribution. Must be a one-dimensional unit
- vector of norm 1.
- kappa : float, default: 1
- Concentration parameter. Must be positive.
- Returns
- -------
- h : scalar
- Entropy of the von Mises-Fisher distribution.
- """
- dim, _, kappa = self._process_parameters(mu, kappa)
- return self._entropy(dim, kappa)
- def fit(self, x):
- """Fit the von Mises-Fisher distribution to data.
- Parameters
- ----------
- x : array-like
- Data the distribution is fitted to. Must be two dimensional.
- The second axis of `x` must be unit vectors of norm 1 and
- determine the dimensionality of the fitted
- von Mises-Fisher distribution.
- Returns
- -------
- mu : ndarray
- Estimated mean direction.
- kappa : float
- Estimated concentration parameter.
- """
- # validate input data
- x = np.asarray(x)
- if x.ndim != 2:
- raise ValueError("'x' must be two dimensional.")
- if not np.allclose(np.linalg.norm(x, axis=-1), 1.):
- msg = "'x' must be unit vectors of norm 1 along last dimension."
- raise ValueError(msg)
- dim = x.shape[-1]
- # mu is simply the directional mean
- dirstats = directional_stats(x)
- mu = dirstats.mean_direction
- r = dirstats.mean_resultant_length
- # kappa is the solution to the equation:
- # r = I[dim/2](kappa) / I[dim/2 -1](kappa)
- # = I[dim/2](kappa) * exp(-kappa) / I[dim/2 -1](kappa) * exp(-kappa)
- # = ive(dim/2, kappa) / ive(dim/2 -1, kappa)
- halfdim = 0.5 * dim
- def solve_for_kappa(kappa):
- bessel_vals = ive([halfdim, halfdim - 1], kappa)
- return bessel_vals[0]/bessel_vals[1] - r
- root_res = root_scalar(solve_for_kappa, method="brentq",
- bracket=(1e-8, 1e9))
- kappa = root_res.root
- return mu, kappa
- vonmises_fisher = vonmises_fisher_gen()
- class vonmises_fisher_frozen(multi_rv_frozen):
- def __init__(self, mu=None, kappa=1, seed=None):
- """Create a frozen von Mises-Fisher distribution.
- Parameters
- ----------
- mu : array_like, default: None
- Mean direction of the distribution.
- kappa : float, default: 1
- Concentration parameter. Must be positive.
- seed : {None, int, `numpy.random.Generator`,
- `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- """
- self._dist = vonmises_fisher_gen(seed)
- self.dim, self.mu, self.kappa = (
- self._dist._process_parameters(mu, kappa)
- )
- def logpdf(self, x):
- """
- Parameters
- ----------
- x : array_like
- Points at which to evaluate the log of the probability
- density function. The last axis of `x` must correspond
- to unit vectors of the same dimensionality as the distribution.
- Returns
- -------
- logpdf : ndarray or scalar
- Log of probability density function evaluated at `x`.
- """
- return self._dist._logpdf(x, self.dim, self.mu, self.kappa)
- def pdf(self, x):
- """
- Parameters
- ----------
- x : array_like
- Points at which to evaluate the log of the probability
- density function. The last axis of `x` must correspond
- to unit vectors of the same dimensionality as the distribution.
- Returns
- -------
- pdf : ndarray or scalar
- Probability density function evaluated at `x`.
- """
- return np.exp(self.logpdf(x))
- def rvs(self, size=1, random_state=None):
- """Draw random variates from the Von Mises-Fisher distribution.
- Parameters
- ----------
- size : int or tuple of ints, optional
- Given a shape of, for example, (m,n,k), m*n*k samples are
- generated, and packed in an m-by-n-by-k arrangement.
- Because each sample is N-dimensional, the output shape
- is (m,n,k,N). If no shape is specified, a single (N-D)
- sample is returned.
- random_state : {None, int, `numpy.random.Generator`,
- `numpy.random.RandomState`}, optional
- If `seed` is None (or `np.random`), the `numpy.random.RandomState`
- singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used,
- seeded with `seed`.
- If `seed` is already a ``Generator`` or ``RandomState`` instance
- then that instance is used.
- Returns
- -------
- rvs : ndarray or scalar
- Random variates of size (`size`, `N`), where `N` is the
- dimension of the distribution.
- """
- random_state = self._dist._get_random_state(random_state)
- return self._dist._rvs(self.dim, self.mu, self.kappa, size,
- random_state)
- def entropy(self):
- """
- Calculate the differential entropy of the von Mises-Fisher
- distribution.
- Returns
- -------
- h: float
- Entropy of the Von Mises-Fisher distribution.
- """
- return self._dist._entropy(self.dim, self.kappa)
- class normal_inverse_gamma_gen(multi_rv_generic):
- r"""Normal-inverse-gamma distribution.
- The normal-inverse-gamma distribution is the conjugate prior of a normal
- distribution with unknown mean and variance.
- Methods
- -------
- pdf(x, s2, mu=0, lmbda=1, a=1, b=1)
- Probability density function.
- logpdf(x, s2, mu=0, lmbda=1, a=1, b=1)
- Log of the probability density function.
- mean(mu=0, lmbda=1, a=1, b=1)
- Distribution mean.
- var(mu=0, lmbda=1, a=1, b=1)
- Distribution variance.
- rvs(mu=0, lmbda=1, a=1, b=1, size=None, random_state=None)
- Draw random samples.
- Parameters
- ----------
- mu, lmbda, a, b : array_like
- Shape parameters of the distribution. See notes.
- seed : {None, int, np.random.RandomState, np.random.Generator}, optional
- Used for drawing random variates.
- If `seed` is `None`, the `~np.random.RandomState` singleton is used.
- If `seed` is an int, a new ``RandomState`` instance is used, seeded
- with seed.
- If `seed` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- See Also
- --------
- norm
- invgamma
- Notes
- -----
- The probability density function of `normal_inverse_gamma` is:
- .. math::
- f(x, \sigma^2; \mu, \lambda, \alpha, \beta) =
- \frac{\sqrt{\lambda}}{\sqrt{2 \pi \sigma^2}}
- \frac{\beta^\alpha}{\Gamma(\alpha)}
- \left( \frac{1}{\sigma^2} \right)^{\alpha + 1}
- \exp \left(- \frac{2 \beta + \lambda (x - \mu)^2} {2 \sigma^2} \right)
- where all parameters are real and finite, and :math:`\sigma^2 > 0`,
- :math:`\lambda > 0`, :math:`\alpha > 0`, and :math:`\beta > 0`.
- Methods ``normal_inverse_gamma.pdf`` and ``normal_inverse_gamma.logpdf``
- accept `x` and `s2` for arguments :math:`x` and :math:`\sigma^2`.
- All methods accept `mu`, `lmbda`, `a`, and `b` for shape parameters
- :math:`\mu`, :math:`\lambda`, :math:`\alpha`, and :math:`\beta`,
- respectively.
- .. versionadded:: 1.15
- References
- ----------
- .. [1] Normal-inverse-gamma distribution, Wikipedia,
- https://en.wikipedia.org/wiki/Normal-inverse-gamma_distribution
- Examples
- --------
- Suppose we wish to investigate the relationship between the
- normal-inverse-gamma distribution and the inverse gamma distribution.
- >>> import numpy as np
- >>> from scipy import stats
- >>> import matplotlib.pyplot as plt
- >>> rng = np.random.default_rng(527484872345)
- >>> mu, lmbda, a, b = 0, 1, 20, 20
- >>> norm_inv_gamma = stats.normal_inverse_gamma(mu, lmbda, a, b)
- >>> inv_gamma = stats.invgamma(a, scale=b)
- One approach is to compare the distribution of the `s2` elements of
- random variates against the PDF of an inverse gamma distribution.
- >>> _, s2 = norm_inv_gamma.rvs(size=10000, random_state=rng)
- >>> bins = np.linspace(s2.min(), s2.max(), 50)
- >>> plt.hist(s2, bins=bins, density=True, label='Frequency density')
- >>> s2 = np.linspace(s2.min(), s2.max(), 300)
- >>> plt.plot(s2, inv_gamma.pdf(s2), label='PDF')
- >>> plt.xlabel(r'$\sigma^2$')
- >>> plt.ylabel('Frequency density / PMF')
- >>> plt.show()
- Similarly, we can compare the marginal distribution of `s2` against
- an inverse gamma distribution.
- >>> from scipy.integrate import quad_vec
- >>> from scipy import integrate
- >>> s2 = np.linspace(0.5, 3, 6)
- >>> res = quad_vec(lambda x: norm_inv_gamma.pdf(x, s2), -np.inf, np.inf)[0]
- >>> np.allclose(res, inv_gamma.pdf(s2))
- True
- The sample mean is comparable to the mean of the distribution.
- >>> x, s2 = norm_inv_gamma.rvs(size=10000, random_state=rng)
- >>> x.mean(), s2.mean()
- (np.float64(-0.005254750127304425), np.float64(1.050438111436508))
- >>> norm_inv_gamma.mean()
- (np.float64(0.0), np.float64(1.0526315789473684))
- Similarly, for the variance:
- >>> x.var(ddof=1), s2.var(ddof=1)
- (np.float64(1.0546150578185023), np.float64(0.061829865266330754))
- >>> norm_inv_gamma.var()
- (np.float64(1.0526315789473684), np.float64(0.061557402277623886))
- """
- def rvs(self, mu=0, lmbda=1, a=1, b=1, size=None, random_state=None):
- """Draw random samples from the distribution.
- Parameters
- ----------
- mu, lmbda, a, b : array_like, optional
- Shape parameters. `lmbda`, `a`, and `b` must be greater
- than zero.
- size : int or tuple of ints, optional
- Shape of samples to draw.
- random_state : {None, int, np.random.RandomState, np.random.Generator}, optional
- Used for drawing random variates.
- If `random_state` is `None`, the `~np.random.RandomState` singleton is used.
- If `random_state` is an int, a new ``RandomState`` instance is used, seeded
- with `random_state`.
- If `random_state` is already a ``RandomState`` or ``Generator`` instance,
- then that object is used.
- Default is `None`.
- Returns
- -------
- x, s2 : ndarray
- Random variates.
- """
- random_state = self._get_random_state(random_state)
- s2 = invgamma(a, scale=b).rvs(size=size, random_state=random_state)
- scale = (s2 / lmbda)**0.5
- x = norm(loc=mu, scale=scale).rvs(size=size, random_state=random_state)
- dtype = np.result_type(1.0, mu, lmbda, a, b)
- return x.astype(dtype), s2.astype(dtype)
- def _logpdf(self, x, s2, mu, lmbda, a, b):
- t1 = 0.5 * (np.log(lmbda) - np.log(2 * np.pi * s2))
- t2 = a*np.log(b) - special.gammaln(a).astype(a.dtype)
- t3 = -(a + 1) * np.log(s2)
- t4 = -(2*b + lmbda*(x - mu)**2) / (2*s2)
- return t1 + t2 + t3 + t4
- def logpdf(self, x, s2, mu=0, lmbda=1, a=1, b=1):
- """Log of the probability density function.
- Parameters
- ----------
- x, s2 : array_like
- Arguments. `s2` must be greater than zero.
- mu, lmbda, a, b : array_like, optional
- Shape parameters. `lmbda`, `a`, and `b` must be greater
- than zero.
- Returns
- -------
- logpdf : ndarray or scalar
- Log of the probability density function.
- """
- invalid, args = self._process_parameters_pdf(x, s2, mu, lmbda, a, b)
- s2 = args[1]
- # Keep it simple for now; lazyselect later, perhaps.
- with np.errstate(all='ignore'):
- logpdf = np.asarray(self._logpdf(*args))
- logpdf[s2 <= 0] = -np.inf
- logpdf[invalid] = np.nan
- return logpdf[()]
- def _pdf(self, x, s2, mu, lmbda, a, b):
- t1 = np.sqrt(lmbda / (2 * np.pi * s2))
- t2 = b**a / special.gamma(a).astype(a.dtype)
- t3 = (1 / s2)**(a + 1)
- t4 = np.exp(-(2*b + lmbda*(x - mu)**2) / (2*s2))
- return t1 * t2 * t3 * t4
- def pdf(self, x, s2, mu=0, lmbda=1, a=1, b=1):
- """The probability density function.
- Parameters
- ----------
- x, s2 : array_like
- Arguments. `s2` must be greater than zero.
- mu, lmbda, a, b : array_like, optional
- Shape parameters. `lmbda`, `a`, and `b` must be greater
- than zero.
- Returns
- -------
- logpdf : ndarray or scalar
- The probability density function.
- """
- invalid, args = self._process_parameters_pdf(x, s2, mu, lmbda, a, b)
- s2 = args[1]
- # Keep it simple for now; lazyselect later, perhaps.
- with np.errstate(all='ignore'):
- pdf = np.asarray(self._pdf(*args))
- pdf[s2 <= 0] = 0
- pdf[invalid] = np.nan
- return pdf[()]
- def mean(self, mu=0, lmbda=1, a=1, b=1):
- """The mean of the distribution.
- Parameters
- ----------
- mu, lmbda, a, b : array_like, optional
- Shape parameters. `lmbda` and `b` must be greater
- than zero, and `a` must be greater than one.
- Returns
- -------
- x, s2 : ndarray
- The mean of the distribution.
- """
- invalid, args = self._process_shapes(mu, lmbda, a, b)
- mu, lmbda, a, b = args
- invalid |= ~(a > 1)
- mean_x = np.asarray(mu).copy()
- mean_s2 = np.asarray(b / (a - 1))
- mean_x[invalid] = np.nan
- mean_s2[invalid] = np.nan
- return mean_x[()], mean_s2[()]
- def var(self, mu=0, lmbda=1, a=1, b=1):
- """The variance of the distribution.
- Parameters
- ----------
- mu, lmbda, a, b : array_like, optional
- Shape parameters. `lmbda` and `b` must be greater
- than zero, and `a` must be greater than two.
- Returns
- -------
- x, s2 : ndarray
- The variance of the distribution.
- """
- invalid, args = self._process_shapes(mu, lmbda, a, b)
- mu, lmbda, a, b = args
- invalid_x = invalid | ~(a > 1)
- invalid_s2 = invalid | ~(a > 2)
- var_x = b / ((a - 1) * lmbda)
- var_s2 = b**2 / ((a - 1)**2 * (a - 2))
- var_x, var_s2 = np.asarray(var_x), np.asarray(var_s2)
- var_x[invalid_x] = np.nan
- var_s2[invalid_s2] = np.nan
- return var_x[()], var_s2[()]
- def _process_parameters_pdf(self, x, s2, mu, lmbda, a, b):
- args = np.broadcast_arrays(x, s2, mu, lmbda, a, b)
- dtype = np.result_type(1.0, *(arg.dtype for arg in args))
- args = [arg.astype(dtype, copy=False) for arg in args]
- x, s2, mu, lmbda, a, b = args
- invalid = ~((lmbda > 0) & (a > 0) & (b > 0))
- return invalid, args
- def _process_shapes(self, mu, lmbda, a, b):
- args = np.broadcast_arrays(mu, lmbda, a, b)
- dtype = np.result_type(1.0, *(arg.dtype for arg in args))
- args = [arg.astype(dtype, copy=False) for arg in args]
- mu, lmbda, a, b = args
- invalid = ~((lmbda > 0) & (a > 0) & (b > 0))
- return invalid, args
- def __call__(self, mu=0, lmbda=1, a=1, b=1, seed=None):
- return normal_inverse_gamma_frozen(mu, lmbda, a, b, seed=seed)
- normal_inverse_gamma = normal_inverse_gamma_gen()
- class normal_inverse_gamma_frozen(multi_rv_frozen):
- def __init__(self, mu=0, lmbda=1, a=1, b=1, seed=None):
- self._dist = normal_inverse_gamma_gen(seed)
- self._shapes = mu, lmbda, a, b
- def logpdf(self, x, s2):
- return self._dist.logpdf(x, s2, *self._shapes)
- def pdf(self, x, s2):
- return self._dist.pdf(x, s2, *self._shapes)
- def mean(self):
- return self._dist.mean(*self._shapes)
- def var(self):
- return self._dist.var(*self._shapes)
- def rvs(self, size=None, random_state=None):
- return self._dist.rvs(*self._shapes, size=size, random_state=random_state)
- # Set frozen generator docstrings from corresponding docstrings in
- # normal_inverse_gamma_gen and fill in default strings in class docstrings
- for name in ['logpdf', 'pdf', 'mean', 'var', 'rvs']:
- method = normal_inverse_gamma_gen.__dict__[name]
- method_frozen = normal_inverse_gamma_frozen.__dict__[name]
- method_frozen.__doc__ = doccer.docformat(method.__doc__,
- mvn_docdict_noparams)
- method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params)
|