_multivariate.py 268 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049
  1. #
  2. # Author: Joris Vankerschaver 2013
  3. #
  4. import math
  5. import warnings
  6. import threading
  7. import types
  8. import numpy as np
  9. import scipy.linalg
  10. from scipy._lib import doccer
  11. from scipy.special import (gammaln, psi, multigammaln, xlogy, entr, betaln,
  12. ive, loggamma)
  13. from scipy import special
  14. import scipy._lib.array_api_extra as xpx
  15. from scipy._lib._util import check_random_state
  16. from scipy.linalg.blas import drot, get_blas_funcs
  17. from ._continuous_distns import norm, invgamma
  18. from ._discrete_distns import binom
  19. from . import _covariance, _rcont
  20. from ._qmvnt import _qmvt, _qmvn, _qauto
  21. from ._morestats import directional_stats
  22. from scipy.optimize import root_scalar
  23. __all__ = ['multivariate_normal',
  24. 'matrix_normal',
  25. 'dirichlet',
  26. 'dirichlet_multinomial',
  27. 'wishart',
  28. 'invwishart',
  29. 'multinomial',
  30. 'special_ortho_group',
  31. 'ortho_group',
  32. 'random_correlation',
  33. 'unitary_group',
  34. 'multivariate_t',
  35. 'multivariate_hypergeom',
  36. 'random_table',
  37. 'uniform_direction',
  38. 'vonmises_fisher',
  39. 'normal_inverse_gamma',
  40. 'matrix_t']
  41. _LOG_2PI = np.log(2 * np.pi)
  42. _LOG_2 = np.log(2)
  43. _LOG_PI = np.log(np.pi)
  44. MVN_LOCK = threading.Lock()
  45. _doc_random_state = """\
  46. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  47. Used for drawing random variates.
  48. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  49. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  50. with seed.
  51. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  52. then that object is used.
  53. Default is `None`.
  54. """
  55. def _squeeze_output(out):
  56. """
  57. Remove single-dimensional entries from array and convert to scalar,
  58. if necessary.
  59. """
  60. out = out.squeeze()
  61. if out.ndim == 0:
  62. out = out[()]
  63. return out
  64. def _eigvalsh_to_eps(spectrum, cond=None, rcond=None):
  65. """Determine which eigenvalues are "small" given the spectrum.
  66. This is for compatibility across various linear algebra functions
  67. that should agree about whether or not a Hermitian matrix is numerically
  68. singular and what is its numerical matrix rank.
  69. This is designed to be compatible with scipy.linalg.pinvh.
  70. Parameters
  71. ----------
  72. spectrum : 1d ndarray
  73. Array of eigenvalues of a Hermitian matrix.
  74. cond, rcond : float, optional
  75. Cutoff for small eigenvalues.
  76. Singular values smaller than rcond * largest_eigenvalue are
  77. considered zero.
  78. If None or -1, suitable machine precision is used.
  79. Returns
  80. -------
  81. eps : float
  82. Magnitude cutoff for numerical negligibility.
  83. """
  84. if rcond is not None:
  85. cond = rcond
  86. if cond in [None, -1]:
  87. t = spectrum.dtype.char.lower()
  88. factor = {'f': 1E3, 'd': 1E6}
  89. cond = factor[t] * np.finfo(t).eps
  90. eps = cond * np.max(abs(spectrum))
  91. return eps
  92. def _pinv_1d(v, eps=1e-5):
  93. """A helper function for computing the pseudoinverse.
  94. Parameters
  95. ----------
  96. v : iterable of numbers
  97. This may be thought of as a vector of eigenvalues or singular values.
  98. eps : float
  99. Values with magnitude no greater than eps are considered negligible.
  100. Returns
  101. -------
  102. v_pinv : 1d float ndarray
  103. A vector of pseudo-inverted numbers.
  104. """
  105. return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float)
  106. def _validate_marginal_input(dimensions, multivariate_dims):
  107. """Determine if input dimensions can be marginalized.
  108. Parameters
  109. ----------
  110. dimensions : float, ndarray
  111. Input dimensions to be marginalized
  112. multivariate_dims : int
  113. Number of dimensions of multivariate distribution.
  114. Returns
  115. -------
  116. dims : ndarray
  117. Array of indices to marginalize
  118. """
  119. dims = np.copy(dimensions)
  120. dims = np.atleast_1d(dims)
  121. if len(dims) == 0:
  122. msg = "Cannot marginalize all dimensions."
  123. raise ValueError(msg)
  124. if not np.issubdtype(dims.dtype, np.integer):
  125. msg = ("Elements of `dimensions` must be integers - the indices "
  126. "of the marginal variables being retained.")
  127. raise ValueError(msg)
  128. original_dims = np.copy(dims)
  129. dims[dims < 0] += multivariate_dims
  130. if len(np.unique(dims)) != len(dims):
  131. msg = "All elements of `dimensions` must be unique."
  132. raise ValueError(msg)
  133. i_invalid = (dims < 0) | (dims >= multivariate_dims)
  134. if np.any(i_invalid):
  135. msg = (f"Dimensions {original_dims[i_invalid]} are invalid "
  136. f"for a distribution in {multivariate_dims} dimensions.")
  137. raise ValueError(msg)
  138. return dims
  139. class _PSD:
  140. """
  141. Compute coordinated functions of a symmetric positive semidefinite matrix.
  142. This class addresses two issues. Firstly it allows the pseudoinverse,
  143. the logarithm of the pseudo-determinant, and the rank of the matrix
  144. to be computed using one call to eigh instead of three.
  145. Secondly it allows these functions to be computed in a way
  146. that gives mutually compatible results.
  147. All of the functions are computed with a common understanding as to
  148. which of the eigenvalues are to be considered negligibly small.
  149. The functions are designed to coordinate with scipy.linalg.pinvh()
  150. but not necessarily with np.linalg.det() or with np.linalg.matrix_rank().
  151. Parameters
  152. ----------
  153. M : array_like
  154. Symmetric positive semidefinite matrix (2-D).
  155. cond, rcond : float, optional
  156. Cutoff for small eigenvalues.
  157. Singular values smaller than rcond * largest_eigenvalue are
  158. considered zero.
  159. If None or -1, suitable machine precision is used.
  160. lower : bool, optional
  161. Whether the pertinent array data is taken from the lower
  162. or upper triangle of M. (Default: lower)
  163. check_finite : bool, optional
  164. Whether to check that the input matrices contain only finite
  165. numbers. Disabling may give a performance gain, but may result
  166. in problems (crashes, non-termination) if the inputs do contain
  167. infinities or NaNs.
  168. allow_singular : bool, optional
  169. Whether to allow a singular matrix. (Default: True)
  170. Notes
  171. -----
  172. The arguments are similar to those of scipy.linalg.pinvh().
  173. """
  174. def __init__(self, M, cond=None, rcond=None, lower=True,
  175. check_finite=True, allow_singular=True):
  176. self._M = np.asarray(M)
  177. # Compute the symmetric eigendecomposition.
  178. # Note that eigh takes care of array conversion, chkfinite,
  179. # and assertion that the matrix is square.
  180. s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite)
  181. eps = _eigvalsh_to_eps(s, cond, rcond)
  182. if np.min(s) < -eps:
  183. msg = "The input matrix must be symmetric positive semidefinite."
  184. raise ValueError(msg)
  185. d = s[s > eps]
  186. if len(d) < len(s) and not allow_singular:
  187. msg = ("When `allow_singular is False`, the input matrix must be "
  188. "symmetric positive definite.")
  189. raise np.linalg.LinAlgError(msg)
  190. s_pinv = _pinv_1d(s, eps)
  191. U = np.multiply(u, np.sqrt(s_pinv))
  192. # Save the eigenvector basis, and tolerance for testing support
  193. self.eps = 1e3*eps
  194. self.V = u[:, s <= eps]
  195. # Initialize the eagerly precomputed attributes.
  196. self.rank = len(d)
  197. self.U = U
  198. self.log_pdet = np.sum(np.log(d))
  199. # Initialize attributes to be lazily computed.
  200. self._pinv = None
  201. def _support_mask(self, x):
  202. """
  203. Check whether x lies in the support of the distribution.
  204. """
  205. residual = np.linalg.norm(x @ self.V, axis=-1)
  206. in_support = residual < self.eps
  207. return in_support
  208. @property
  209. def pinv(self):
  210. if self._pinv is None:
  211. self._pinv = np.dot(self.U, self.U.T)
  212. return self._pinv
  213. class multi_rv_generic:
  214. """
  215. Class which encapsulates common functionality between all multivariate
  216. distributions.
  217. """
  218. def __init__(self, seed=None):
  219. super().__init__()
  220. self._random_state = check_random_state(seed)
  221. @property
  222. def random_state(self):
  223. """ Get or set the Generator object for generating random variates.
  224. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  225. singleton is used.
  226. If `seed` is an int, a new ``RandomState`` instance is used,
  227. seeded with `seed`.
  228. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  229. that instance is used.
  230. """
  231. return self._random_state
  232. @random_state.setter
  233. def random_state(self, seed):
  234. self._random_state = check_random_state(seed)
  235. def _get_random_state(self, random_state):
  236. if random_state is not None:
  237. return check_random_state(random_state)
  238. else:
  239. return self._random_state
  240. class multi_rv_frozen:
  241. """
  242. Class which encapsulates common functionality between all frozen
  243. multivariate distributions.
  244. """
  245. # generic type compatibility with scipy-stubs
  246. __class_getitem__ = classmethod(types.GenericAlias)
  247. @property
  248. def random_state(self):
  249. return self._dist._random_state
  250. @random_state.setter
  251. def random_state(self, seed):
  252. self._dist._random_state = check_random_state(seed)
  253. _mvn_doc_default_callparams = """\
  254. mean : array_like, default: ``[0]``
  255. Mean of the distribution.
  256. cov : array_like or `Covariance`, default: ``[1]``
  257. Symmetric positive (semi)definite covariance matrix of the distribution.
  258. allow_singular : bool, default: ``False``
  259. Whether to allow a singular covariance matrix. This is ignored if `cov` is
  260. a `Covariance` object.
  261. """
  262. _mvn_doc_callparams_note = """\
  263. Setting the parameter `mean` to `None` is equivalent to having `mean`
  264. be the zero-vector. The parameter `cov` can be a scalar, in which case
  265. the covariance matrix is the identity times that value, a vector of
  266. diagonal entries for the covariance matrix, a two-dimensional array_like,
  267. or a `Covariance` object.
  268. """
  269. _mvn_doc_frozen_callparams = ""
  270. _mvn_doc_frozen_callparams_note = """\
  271. See class definition for a detailed description of parameters."""
  272. mvn_docdict_params = {
  273. '_mvn_doc_default_callparams': _mvn_doc_default_callparams,
  274. '_mvn_doc_callparams_note': _mvn_doc_callparams_note,
  275. '_doc_random_state': _doc_random_state
  276. }
  277. mvn_docdict_noparams = {
  278. '_mvn_doc_default_callparams': _mvn_doc_frozen_callparams,
  279. '_mvn_doc_callparams_note': _mvn_doc_frozen_callparams_note,
  280. '_doc_random_state': _doc_random_state
  281. }
  282. class multivariate_normal_gen(multi_rv_generic):
  283. r"""A multivariate normal random variable.
  284. The `mean` keyword specifies the mean. The `cov` keyword specifies the
  285. covariance matrix.
  286. Methods
  287. -------
  288. pdf(x, mean=None, cov=1, allow_singular=False)
  289. Probability density function.
  290. logpdf(x, mean=None, cov=1, allow_singular=False)
  291. Log of the probability density function.
  292. cdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5, lower_limit=None)
  293. Cumulative distribution function.
  294. logcdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)
  295. Log of the cumulative distribution function.
  296. rvs(mean=None, cov=1, size=1, random_state=None)
  297. Draw random samples from a multivariate normal distribution.
  298. entropy(mean=None, cov=1)
  299. Compute the differential entropy of the multivariate normal.
  300. marginal(dimensions, mean=None, cov=1, allow_singular=False)
  301. Return a marginal multivariate normal distribution.
  302. fit(x, fix_mean=None, fix_cov=None)
  303. Fit a multivariate normal distribution to data.
  304. Parameters
  305. ----------
  306. %(_mvn_doc_default_callparams)s
  307. %(_doc_random_state)s
  308. Notes
  309. -----
  310. %(_mvn_doc_callparams_note)s
  311. The covariance matrix `cov` may be an instance of a subclass of
  312. `Covariance`, e.g. `scipy.stats.CovViaPrecision`. If so, `allow_singular`
  313. is ignored.
  314. Otherwise, `cov` must be a symmetric positive semidefinite
  315. matrix when `allow_singular` is True; it must be (strictly) positive
  316. definite when `allow_singular` is False.
  317. Symmetry is not checked; only the lower triangular portion is used.
  318. The determinant and inverse of `cov` are computed
  319. as the pseudo-determinant and pseudo-inverse, respectively, so
  320. that `cov` does not need to have full rank.
  321. The probability density function for `multivariate_normal` is
  322. .. math::
  323. f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}}
  324. \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right),
  325. where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,
  326. :math:`k` the rank of :math:`\Sigma`. In case of singular :math:`\Sigma`,
  327. SciPy extends this definition according to [1]_.
  328. .. versionadded:: 0.14.0
  329. References
  330. ----------
  331. .. [1] Multivariate Normal Distribution - Degenerate Case, Wikipedia,
  332. https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case
  333. Examples
  334. --------
  335. >>> import numpy as np
  336. >>> import matplotlib.pyplot as plt
  337. >>> from scipy.stats import multivariate_normal
  338. >>> x = np.linspace(0, 5, 10, endpoint=False)
  339. >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y
  340. array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129,
  341. 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349])
  342. >>> fig1 = plt.figure()
  343. >>> ax = fig1.add_subplot(111)
  344. >>> ax.plot(x, y)
  345. >>> plt.show()
  346. Alternatively, the object may be called (as a function) to fix the mean
  347. and covariance parameters, returning a "frozen" multivariate normal
  348. random variable:
  349. >>> rv = multivariate_normal(mean=None, cov=1, allow_singular=False)
  350. >>> # Frozen object with the same methods but holding the given
  351. >>> # mean and covariance fixed.
  352. The input quantiles can be any shape of array, as long as the last
  353. axis labels the components. This allows us for instance to
  354. display the frozen pdf for a non-isotropic random variable in 2D as
  355. follows:
  356. >>> x, y = np.mgrid[-1:1:.01, -1:1:.01]
  357. >>> pos = np.dstack((x, y))
  358. >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])
  359. >>> fig2 = plt.figure()
  360. >>> ax2 = fig2.add_subplot(111)
  361. >>> ax2.contourf(x, y, rv.pdf(pos))
  362. """ # noqa: E501
  363. def __init__(self, seed=None):
  364. super().__init__(seed)
  365. self.__doc__ = doccer.docformat(self.__doc__, mvn_docdict_params)
  366. def __call__(self, mean=None, cov=1, allow_singular=False, seed=None, **kwds):
  367. """Create a frozen multivariate normal distribution.
  368. See `multivariate_normal_frozen` for more information.
  369. """
  370. return multivariate_normal_frozen(mean, cov,
  371. allow_singular=allow_singular,
  372. seed=seed, **kwds)
  373. def _process_parameters(self, mean, cov, allow_singular=True):
  374. """
  375. Infer dimensionality from mean or covariance matrix, ensure that
  376. mean and covariance are full vector resp. matrix.
  377. """
  378. if isinstance(cov, _covariance.Covariance):
  379. return self._process_parameters_Covariance(mean, cov)
  380. else:
  381. # Before `Covariance` classes were introduced,
  382. # `multivariate_normal` accepted plain arrays as `cov` and used the
  383. # following input validation. To avoid disturbing the behavior of
  384. # `multivariate_normal` when plain arrays are used, we use the
  385. # original input validation here.
  386. dim, mean, cov = self._process_parameters_psd(None, mean, cov)
  387. # After input validation, some methods then processed the arrays
  388. # with a `_PSD` object and used that to perform computation.
  389. # To avoid branching statements in each method depending on whether
  390. # `cov` is an array or `Covariance` object, we always process the
  391. # array with `_PSD`, and then use wrapper that satisfies the
  392. # `Covariance` interface, `CovViaPSD`.
  393. psd = _PSD(cov, allow_singular=allow_singular)
  394. cov_object = _covariance.CovViaPSD(psd)
  395. return dim, mean, cov_object
  396. def _process_parameters_Covariance(self, mean, cov):
  397. dim = cov.shape[-1]
  398. mean = np.array([0.]) if mean is None else mean
  399. message = (f"`cov` represents a covariance matrix in {dim} dimensions,"
  400. f"and so `mean` must be broadcastable to shape {(dim,)}")
  401. try:
  402. mean = np.broadcast_to(mean, dim)
  403. except ValueError as e:
  404. raise ValueError(message) from e
  405. return dim, mean, cov
  406. def _process_parameters_psd(self, dim, mean, cov):
  407. # Try to infer dimensionality
  408. if dim is None:
  409. if mean is None:
  410. if cov is None:
  411. dim = 1
  412. else:
  413. cov = np.asarray(cov, dtype=float)
  414. if cov.ndim < 2:
  415. dim = 1
  416. else:
  417. dim = cov.shape[0]
  418. else:
  419. mean = np.asarray(mean, dtype=float)
  420. dim = mean.size
  421. else:
  422. if not np.isscalar(dim):
  423. raise ValueError("Dimension of random variable must be "
  424. "a scalar.")
  425. # Check input sizes and return full arrays for mean and cov if
  426. # necessary
  427. if mean is None:
  428. mean = np.zeros(dim)
  429. mean = np.asarray(mean, dtype=float)
  430. if cov is None:
  431. cov = 1.0
  432. cov = np.asarray(cov, dtype=float)
  433. if dim == 1:
  434. mean = mean.reshape(1)
  435. cov = cov.reshape(1, 1)
  436. if mean.ndim != 1 or mean.shape[0] != dim:
  437. raise ValueError(f"Array 'mean' must be a vector of length {dim}.")
  438. if cov.ndim == 0:
  439. cov = cov * np.eye(dim)
  440. elif cov.ndim == 1:
  441. cov = np.diag(cov)
  442. elif cov.ndim == 2 and cov.shape != (dim, dim):
  443. rows, cols = cov.shape
  444. if rows != cols:
  445. msg = ("Array 'cov' must be square if it is two dimensional,"
  446. f" but cov.shape = {str(cov.shape)}.")
  447. else:
  448. msg = (f"Dimension mismatch: array 'cov' is of shape {cov.shape}, "
  449. f"but 'mean' is a vector of length {len(mean)}.")
  450. raise ValueError(msg)
  451. elif cov.ndim > 2:
  452. raise ValueError(f"Array 'cov' must be at most two-dimensional, "
  453. f"but cov.ndim = {cov.ndim}")
  454. return dim, mean, cov
  455. def _process_quantiles(self, x, dim):
  456. """
  457. Adjust quantiles array so that last axis labels the components of
  458. each data point.
  459. """
  460. x = np.asarray(x, dtype=float)
  461. if x.ndim == 0:
  462. x = x[np.newaxis]
  463. elif x.ndim == 1:
  464. if dim == 1:
  465. x = x[:, np.newaxis]
  466. else:
  467. x = x[np.newaxis, :]
  468. return x
  469. def _logpdf(self, x, mean, cov_object):
  470. """Log of the multivariate normal probability density function.
  471. Parameters
  472. ----------
  473. x : ndarray
  474. Points at which to evaluate the log of the probability
  475. density function
  476. mean : ndarray
  477. Mean of the distribution
  478. cov_object : Covariance
  479. An object representing the Covariance matrix
  480. Notes
  481. -----
  482. As this function does no argument checking, it should not be
  483. called directly; use 'logpdf' instead.
  484. """
  485. log_det_cov, rank = cov_object.log_pdet, cov_object.rank
  486. dev = x - mean
  487. if dev.ndim > 1:
  488. log_det_cov = log_det_cov[..., np.newaxis]
  489. rank = rank[..., np.newaxis]
  490. maha = np.sum(np.square(cov_object.whiten(dev)), axis=-1)
  491. return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
  492. def logpdf(self, x, mean=None, cov=1, allow_singular=False):
  493. """Log of the multivariate normal probability density function.
  494. Parameters
  495. ----------
  496. x : array_like
  497. Quantiles, with the last axis of `x` denoting the components.
  498. %(_mvn_doc_default_callparams)s
  499. Returns
  500. -------
  501. pdf : ndarray or scalar
  502. Log of the probability density function evaluated at `x`
  503. Notes
  504. -----
  505. %(_mvn_doc_callparams_note)s
  506. """
  507. params = self._process_parameters(mean, cov, allow_singular)
  508. dim, mean, cov_object = params
  509. x = self._process_quantiles(x, dim)
  510. out = self._logpdf(x, mean, cov_object)
  511. if np.any(cov_object.rank < dim):
  512. out_of_bounds = ~cov_object._support_mask(x-mean)
  513. out[out_of_bounds] = -np.inf
  514. return _squeeze_output(out)
  515. def pdf(self, x, mean=None, cov=1, allow_singular=False):
  516. """Multivariate normal probability density function.
  517. Parameters
  518. ----------
  519. x : array_like
  520. Quantiles, with the last axis of `x` denoting the components.
  521. %(_mvn_doc_default_callparams)s
  522. Returns
  523. -------
  524. pdf : ndarray or scalar
  525. Probability density function evaluated at `x`
  526. Notes
  527. -----
  528. %(_mvn_doc_callparams_note)s
  529. """
  530. params = self._process_parameters(mean, cov, allow_singular)
  531. dim, mean, cov_object = params
  532. x = self._process_quantiles(x, dim)
  533. out = np.exp(self._logpdf(x, mean, cov_object))
  534. if np.any(cov_object.rank < dim):
  535. out_of_bounds = ~cov_object._support_mask(x-mean)
  536. out[out_of_bounds] = 0.0
  537. return _squeeze_output(out)
  538. def _cdf(self, x, mean, cov, maxpts, abseps, releps, lower_limit, rng):
  539. """Multivariate normal cumulative distribution function.
  540. Parameters
  541. ----------
  542. x : ndarray
  543. Points at which to evaluate the cumulative distribution function.
  544. mean : ndarray
  545. Mean of the distribution
  546. cov : array_like
  547. Covariance matrix of the distribution
  548. maxpts : integer
  549. The maximum number of points to use for integration
  550. abseps : float
  551. Absolute error tolerance
  552. releps : float
  553. Relative error tolerance
  554. lower_limit : array_like, optional
  555. Lower limit of integration of the cumulative distribution function.
  556. Default is negative infinity. Must be broadcastable with `x`.
  557. rng : Generator
  558. an instance of ``np.random.Generator``, which is used internally
  559. for QMC integration.
  560. Notes
  561. -----
  562. As this function does no argument checking, it should not be
  563. called directly; use 'cdf' instead.
  564. .. versionadded:: 1.0.0
  565. """
  566. lower = (np.full(mean.shape, -np.inf)
  567. if lower_limit is None else lower_limit)
  568. # In 2d, _mvn.mvnun accepts input in which `lower` bound elements
  569. # are greater than `x`. Not so in other dimensions. Fix this by
  570. # ensuring that lower bounds are indeed lower when passed, then
  571. # set signs of resulting CDF manually.
  572. b, a = np.broadcast_arrays(x, lower)
  573. b, a = b - mean, a - mean # _qmvn only accepts zero mean
  574. i_swap = b < a
  575. signs = (-1)**(i_swap.sum(axis=-1)) # odd # of swaps -> negative
  576. a, b = a.copy(), b.copy()
  577. a[i_swap], b[i_swap] = b[i_swap], a[i_swap]
  578. n = x.shape[-1]
  579. limits = np.concatenate((a, b), axis=-1)
  580. # qmvn expects 1-d arguments, so process points sequentially
  581. # XXX: if cov.ndim == 2 and limits.ndim == 1, can avoid apply_along_axis
  582. def func1d(limits):
  583. # res0 = _qmvn(maxpts, cov, limits[:n], limits[n:], rng)[0]
  584. res = _qauto(_qmvn, cov, limits[:n], limits[n:],
  585. rng, error=abseps, limit=maxpts, n_batches=10)
  586. return np.squeeze(res[0])
  587. out = np.apply_along_axis(func1d, -1, limits) * signs
  588. return _squeeze_output(out)
  589. def logcdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
  590. abseps=1e-5, releps=1e-5, *, lower_limit=None, rng=None):
  591. """Log of the multivariate normal cumulative distribution function.
  592. Parameters
  593. ----------
  594. x : array_like
  595. Quantiles, with the last axis of `x` denoting the components.
  596. %(_mvn_doc_default_callparams)s
  597. maxpts : integer, optional
  598. The maximum number of points to use for integration
  599. (default ``1000000*dim``)
  600. abseps : float, optional
  601. Absolute error tolerance (default 1e-5)
  602. releps : float, optional
  603. Relative error tolerance (default 1e-5)
  604. lower_limit : array_like, optional
  605. Lower limit of integration of the cumulative distribution function.
  606. Default is negative infinity. Must be broadcastable with `x`.
  607. rng : Generator, optional
  608. an instance of ``np.random.Generator``, which is used internally
  609. for QMC integration.
  610. Returns
  611. -------
  612. cdf : ndarray or scalar
  613. Log of the cumulative distribution function evaluated at `x`
  614. Notes
  615. -----
  616. %(_mvn_doc_callparams_note)s
  617. .. versionadded:: 1.0.0
  618. """
  619. params = self._process_parameters(mean, cov, allow_singular)
  620. dim, mean, cov_object = params
  621. cov = cov_object.covariance
  622. x = self._process_quantiles(x, dim)
  623. if not maxpts:
  624. maxpts = 1000000 * dim
  625. rng = self._get_random_state(rng)
  626. cdf = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit, rng)
  627. # the log of a negative real is complex, and cdf can be negative
  628. # if lower limit is greater than upper limit
  629. cdf = cdf + 0j if np.any(cdf < 0) else cdf
  630. out = np.log(cdf)
  631. return out
  632. def cdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
  633. abseps=1e-5, releps=1e-5, *, lower_limit=None, rng=None):
  634. """Multivariate normal cumulative distribution function.
  635. Parameters
  636. ----------
  637. x : array_like
  638. Quantiles, with the last axis of `x` denoting the components.
  639. %(_mvn_doc_default_callparams)s
  640. maxpts : integer, optional
  641. The maximum number of points to use for integration
  642. (default ``1000000*dim``)
  643. abseps : float, optional
  644. Absolute error tolerance (default 1e-5)
  645. releps : float, optional
  646. Relative error tolerance (default 1e-5)
  647. lower_limit : array_like, optional
  648. Lower limit of integration of the cumulative distribution function.
  649. Default is negative infinity. Must be broadcastable with `x`.
  650. rng : Generator, optional
  651. an instance of ``np.random.Generator``, which is used internally
  652. for QMC integration.
  653. Returns
  654. -------
  655. cdf : ndarray or scalar
  656. Cumulative distribution function evaluated at `x`
  657. Notes
  658. -----
  659. %(_mvn_doc_callparams_note)s
  660. .. versionadded:: 1.0.0
  661. """
  662. params = self._process_parameters(mean, cov, allow_singular)
  663. dim, mean, cov_object = params
  664. cov = cov_object.covariance
  665. x = self._process_quantiles(x, dim)
  666. if not maxpts:
  667. maxpts = 1000000 * dim
  668. rng = self._get_random_state(rng)
  669. out = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit, rng)
  670. return out
  671. def rvs(self, mean=None, cov=1, size=1, random_state=None):
  672. """Draw random samples from a multivariate normal distribution.
  673. Parameters
  674. ----------
  675. %(_mvn_doc_default_callparams)s
  676. size : integer, optional
  677. Number of samples to draw (default 1).
  678. %(_doc_random_state)s
  679. Returns
  680. -------
  681. rvs : ndarray or scalar
  682. Random variates of size (`size`, `N`), where `N` is the
  683. dimension of the random variable.
  684. Notes
  685. -----
  686. %(_mvn_doc_callparams_note)s
  687. """
  688. dim, mean, cov_object = self._process_parameters(mean, cov)
  689. random_state = self._get_random_state(random_state)
  690. if isinstance(cov_object, _covariance.CovViaPSD):
  691. cov = cov_object.covariance
  692. out = random_state.multivariate_normal(mean, cov, size)
  693. out = _squeeze_output(out)
  694. else:
  695. size = size or tuple()
  696. if not np.iterable(size):
  697. size = (size,)
  698. shape = tuple(size) + (cov_object.shape[-1],)
  699. x = random_state.normal(size=shape)
  700. out = mean + cov_object.colorize(x)
  701. return out
  702. def entropy(self, mean=None, cov=1):
  703. """Compute the differential entropy of the multivariate normal.
  704. Parameters
  705. ----------
  706. %(_mvn_doc_default_callparams)s
  707. Returns
  708. -------
  709. h : scalar
  710. Entropy of the multivariate normal distribution
  711. Notes
  712. -----
  713. %(_mvn_doc_callparams_note)s
  714. """
  715. dim, mean, cov_object = self._process_parameters(mean, cov)
  716. return 0.5 * (cov_object.rank * (_LOG_2PI + 1) + cov_object.log_pdet)
  717. def fit(self, x, fix_mean=None, fix_cov=None):
  718. """Fit a multivariate normal distribution to data.
  719. Parameters
  720. ----------
  721. x : ndarray (m, n)
  722. Data the distribution is fitted to. Must have two axes.
  723. The first axis of length `m` represents the number of vectors
  724. the distribution is fitted to. The second axis of length `n`
  725. determines the dimensionality of the fitted distribution.
  726. fix_mean : ndarray(n, )
  727. Fixed mean vector. Must have length `n`.
  728. fix_cov: ndarray (n, n)
  729. Fixed covariance matrix. Must have shape ``(n, n)``.
  730. Returns
  731. -------
  732. mean : ndarray (n, )
  733. Maximum likelihood estimate of the mean vector
  734. cov : ndarray (n, n)
  735. Maximum likelihood estimate of the covariance matrix
  736. """
  737. # input validation for data to be fitted
  738. x = np.asarray(x)
  739. if x.ndim != 2:
  740. raise ValueError("`x` must be two-dimensional.")
  741. n_vectors, dim = x.shape
  742. # parameter estimation
  743. # reference: https://home.ttic.edu/~shubhendu/Slides/Estimation.pdf
  744. if fix_mean is not None:
  745. # input validation for `fix_mean`
  746. fix_mean = np.atleast_1d(fix_mean)
  747. if fix_mean.shape != (dim, ):
  748. msg = ("`fix_mean` must be a one-dimensional array the same "
  749. "length as the dimensionality of the vectors `x`.")
  750. raise ValueError(msg)
  751. mean = fix_mean
  752. else:
  753. mean = x.mean(axis=0)
  754. if fix_cov is not None:
  755. # input validation for `fix_cov`
  756. fix_cov = np.atleast_2d(fix_cov)
  757. # validate shape
  758. if fix_cov.shape != (dim, dim):
  759. msg = ("`fix_cov` must be a two-dimensional square array "
  760. "of same side length as the dimensionality of the "
  761. "vectors `x`.")
  762. raise ValueError(msg)
  763. # validate positive semidefiniteness
  764. # a trimmed down copy from _PSD
  765. s, u = scipy.linalg.eigh(fix_cov, lower=True, check_finite=True)
  766. eps = _eigvalsh_to_eps(s)
  767. if np.min(s) < -eps:
  768. msg = "`fix_cov` must be symmetric positive semidefinite."
  769. raise ValueError(msg)
  770. cov = fix_cov
  771. else:
  772. centered_data = x - mean
  773. cov = centered_data.T @ centered_data / n_vectors
  774. return mean, cov
  775. def marginal(self, dimensions, mean=None, cov=1, allow_singular=False):
  776. """Return a marginal multivariate normal distribution.
  777. Parameters
  778. ----------
  779. dimensions : int or 1-d array_like
  780. The dimensions of the multivariate distribution corresponding
  781. with the marginal variables, that is, the indices of the dimensions
  782. that are being retained. The other dimensions are marginalized out.
  783. %(_mvn_doc_default_callparams)s
  784. Returns
  785. -------
  786. marginal_multivariate_normal : multivariate_normal_frozen
  787. An object representing the marginal distribution.
  788. Notes
  789. -----
  790. %(_mvn_doc_callparams_note)s
  791. """
  792. params = self._process_parameters(mean, cov, allow_singular)
  793. n, mean, cov_object = params
  794. dims = _validate_marginal_input(dimensions, n)
  795. mean = mean[dims]
  796. cov = cov_object.covariance[np.ix_(dims, dims)]
  797. return multivariate_normal_frozen(mean, cov, allow_singular)
  798. multivariate_normal = multivariate_normal_gen()
  799. class multivariate_normal_frozen(multi_rv_frozen):
  800. __class_getitem__ = None
  801. def __init__(self, mean=None, cov=1, allow_singular=False, seed=None,
  802. maxpts=None, abseps=1e-5, releps=1e-5):
  803. """Create a frozen multivariate normal distribution.
  804. Parameters
  805. ----------
  806. mean : array_like, default: ``[0]``
  807. Mean of the distribution.
  808. cov : array_like, default: ``[1]``
  809. Symmetric positive (semi)definite covariance matrix of the
  810. distribution.
  811. allow_singular : bool, default: ``False``
  812. Whether to allow a singular covariance matrix.
  813. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  814. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  815. singleton is used.
  816. If `seed` is an int, a new ``RandomState`` instance is used,
  817. seeded with `seed`.
  818. If `seed` is already a ``Generator`` or ``RandomState`` instance
  819. then that instance is used.
  820. maxpts : integer, optional
  821. The maximum number of points to use for integration of the
  822. cumulative distribution function (default ``1000000*dim``)
  823. abseps : float, optional
  824. Absolute error tolerance for the cumulative distribution function
  825. (default 1e-5)
  826. releps : float, optional
  827. Relative error tolerance for the cumulative distribution function
  828. (default 1e-5)
  829. Examples
  830. --------
  831. When called with the default parameters, this will create a 1D random
  832. variable with mean 0 and covariance 1:
  833. >>> from scipy.stats import multivariate_normal
  834. >>> r = multivariate_normal()
  835. >>> r.mean
  836. array([ 0.])
  837. >>> r.cov
  838. array([[1.]])
  839. """ # numpy/numpydoc#87 # noqa: E501
  840. self._dist = multivariate_normal_gen(seed)
  841. self.dim, self.mean, self.cov_object = (
  842. self._dist._process_parameters(mean, cov, allow_singular))
  843. self.allow_singular = allow_singular or self.cov_object._allow_singular
  844. if not maxpts:
  845. maxpts = 1000000 * self.dim
  846. self.maxpts = maxpts
  847. self.abseps = abseps
  848. self.releps = releps
  849. @property
  850. def cov(self):
  851. return self.cov_object.covariance
  852. def logpdf(self, x):
  853. x = self._dist._process_quantiles(x, self.dim)
  854. out = self._dist._logpdf(x, self.mean, self.cov_object)
  855. if np.any(self.cov_object.rank < self.dim):
  856. out_of_bounds = ~self.cov_object._support_mask(x-self.mean)
  857. out[out_of_bounds] = -np.inf
  858. return _squeeze_output(out)
  859. def pdf(self, x):
  860. return np.exp(self.logpdf(x))
  861. def logcdf(self, x, *, lower_limit=None, rng=None):
  862. cdf = self.cdf(x, lower_limit=lower_limit, rng=rng)
  863. # the log of a negative real is complex, and cdf can be negative
  864. # if lower limit is greater than upper limit
  865. cdf = cdf + 0j if np.any(cdf < 0) else cdf
  866. out = np.log(cdf)
  867. return out
  868. def cdf(self, x, *, lower_limit=None, rng=None):
  869. x = self._dist._process_quantiles(x, self.dim)
  870. rng = self._dist._get_random_state(rng)
  871. out = self._dist._cdf(x, self.mean, self.cov_object.covariance,
  872. self.maxpts, self.abseps, self.releps,
  873. lower_limit, rng)
  874. return _squeeze_output(out)
  875. def rvs(self, size=1, random_state=None):
  876. return self._dist.rvs(self.mean, self.cov_object, size, random_state)
  877. def entropy(self):
  878. """Computes the differential entropy of the multivariate normal.
  879. Returns
  880. -------
  881. h : scalar
  882. Entropy of the multivariate normal distribution
  883. """
  884. log_pdet = self.cov_object.log_pdet
  885. rank = self.cov_object.rank
  886. return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet)
  887. def marginal(self, dimensions):
  888. return self._dist.marginal(dimensions, self.mean,
  889. self.cov_object, self.allow_singular)
  890. # Set frozen generator docstrings from corresponding docstrings in
  891. # multivariate_normal_gen and fill in default strings in class docstrings
  892. for name in ['logpdf', 'pdf', 'logcdf', 'cdf', 'rvs']:
  893. method = multivariate_normal_gen.__dict__[name]
  894. method_frozen = multivariate_normal_frozen.__dict__[name]
  895. method_frozen.__doc__ = doccer.docformat(method.__doc__,
  896. mvn_docdict_noparams)
  897. method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params)
  898. _matnorm_doc_default_callparams = """\
  899. mean : array_like, optional
  900. Mean of the distribution (default: `None`)
  901. rowcov : array_like, optional
  902. Among-row covariance matrix of the distribution (default: ``1``)
  903. colcov : array_like, optional
  904. Among-column covariance matrix of the distribution (default: ``1``)
  905. """
  906. _matnorm_doc_callparams_note = """\
  907. If `mean` is set to `None` then a matrix of zeros is used for the mean.
  908. The dimensions of this matrix are inferred from the shape of `rowcov` and
  909. `colcov`, if these are provided, or set to ``1`` if ambiguous.
  910. `rowcov` and `colcov` can be two-dimensional array_likes specifying the
  911. covariance matrices directly. Alternatively, a one-dimensional array will
  912. be be interpreted as the entries of a diagonal matrix, and a scalar or
  913. zero-dimensional array will be interpreted as this value times the
  914. identity matrix.
  915. """
  916. _matnorm_doc_frozen_callparams = ""
  917. _matnorm_doc_frozen_callparams_note = """\
  918. See class definition for a detailed description of parameters."""
  919. matnorm_docdict_params = {
  920. '_matnorm_doc_default_callparams': _matnorm_doc_default_callparams,
  921. '_matnorm_doc_callparams_note': _matnorm_doc_callparams_note,
  922. '_doc_random_state': _doc_random_state
  923. }
  924. matnorm_docdict_noparams = {
  925. '_matnorm_doc_default_callparams': _matnorm_doc_frozen_callparams,
  926. '_matnorm_doc_callparams_note': _matnorm_doc_frozen_callparams_note,
  927. '_doc_random_state': _doc_random_state
  928. }
  929. class matrix_normal_gen(multi_rv_generic):
  930. r"""A matrix normal random variable.
  931. The `mean` keyword specifies the mean. The `rowcov` keyword specifies the
  932. among-row covariance matrix. The 'colcov' keyword specifies the
  933. among-column covariance matrix.
  934. Methods
  935. -------
  936. pdf(X, mean=None, rowcov=1, colcov=1)
  937. Probability density function.
  938. logpdf(X, mean=None, rowcov=1, colcov=1)
  939. Log of the probability density function.
  940. rvs(mean=None, rowcov=1, colcov=1, size=1, random_state=None)
  941. Draw random samples.
  942. entropy(rowcol=1, colcov=1)
  943. Differential entropy.
  944. Parameters
  945. ----------
  946. %(_matnorm_doc_default_callparams)s
  947. %(_doc_random_state)s
  948. Notes
  949. -----
  950. %(_matnorm_doc_callparams_note)s
  951. The covariance matrices specified by `rowcov` and `colcov` must be
  952. (symmetric) positive definite. If the samples in `X` are
  953. :math:`m \times n`, then `rowcov` must be :math:`m \times m` and
  954. `colcov` must be :math:`n \times n`. `mean` must be the same shape as `X`.
  955. The probability density function for `matrix_normal` is
  956. .. math::
  957. f(X) = (2 \pi)^{-\frac{mn}{2}}|U|^{-\frac{n}{2}} |V|^{-\frac{m}{2}}
  958. \exp\left( -\frac{1}{2} \mathrm{Tr}\left[ U^{-1} (X-M) V^{-1}
  959. (X-M)^T \right] \right),
  960. where :math:`M` is the mean, :math:`U` the among-row covariance matrix,
  961. :math:`V` the among-column covariance matrix.
  962. The `allow_singular` behaviour of the `multivariate_normal`
  963. distribution is not currently supported. Covariance matrices must be
  964. full rank.
  965. The `matrix_normal` distribution is closely related to the
  966. `multivariate_normal` distribution. Specifically, :math:`\mathrm{Vec}(X)`
  967. (the vector formed by concatenating the columns of :math:`X`) has a
  968. multivariate normal distribution with mean :math:`\mathrm{Vec}(M)`
  969. and covariance :math:`V \otimes U` (where :math:`\otimes` is the Kronecker
  970. product). Sampling and pdf evaluation are
  971. :math:`\mathcal{O}(m^3 + n^3 + m^2 n + m n^2)` for the matrix normal, but
  972. :math:`\mathcal{O}(m^3 n^3)` for the equivalent multivariate normal,
  973. making this equivalent form algorithmically inefficient.
  974. .. versionadded:: 0.17.0
  975. Examples
  976. --------
  977. >>> import numpy as np
  978. >>> from scipy.stats import matrix_normal
  979. >>> M = np.arange(6).reshape(3,2); M
  980. array([[0, 1],
  981. [2, 3],
  982. [4, 5]])
  983. >>> U = np.diag([1,2,3]); U
  984. array([[1, 0, 0],
  985. [0, 2, 0],
  986. [0, 0, 3]])
  987. >>> V = 0.3*np.identity(2); V
  988. array([[ 0.3, 0. ],
  989. [ 0. , 0.3]])
  990. >>> X = M + 0.1; X
  991. array([[ 0.1, 1.1],
  992. [ 2.1, 3.1],
  993. [ 4.1, 5.1]])
  994. >>> matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V)
  995. 0.023410202050005054
  996. >>> # Equivalent multivariate normal
  997. >>> from scipy.stats import multivariate_normal
  998. >>> vectorised_X = X.T.flatten()
  999. >>> equiv_mean = M.T.flatten()
  1000. >>> equiv_cov = np.kron(V,U)
  1001. >>> multivariate_normal.pdf(vectorised_X, mean=equiv_mean, cov=equiv_cov)
  1002. 0.023410202050005054
  1003. Alternatively, the object may be called (as a function) to fix the mean
  1004. and covariance parameters, returning a "frozen" matrix normal
  1005. random variable:
  1006. >>> rv = matrix_normal(mean=None, rowcov=1, colcov=1)
  1007. >>> # Frozen object with the same methods but holding the given
  1008. >>> # mean and covariance fixed.
  1009. """
  1010. def __init__(self, seed=None):
  1011. super().__init__(seed)
  1012. self.__doc__ = doccer.docformat(self.__doc__, matnorm_docdict_params)
  1013. def __call__(self, mean=None, rowcov=1, colcov=1, seed=None):
  1014. """Create a frozen matrix normal distribution.
  1015. See `matrix_normal_frozen` for more information.
  1016. """
  1017. return matrix_normal_frozen(mean, rowcov, colcov, seed=seed)
  1018. def _process_parameters(self, mean, rowcov, colcov):
  1019. """
  1020. Infer dimensionality from mean or covariance matrices. Handle
  1021. defaults. Ensure compatible dimensions.
  1022. """
  1023. # Process mean
  1024. if mean is not None:
  1025. mean = np.asarray(mean, dtype=float)
  1026. meanshape = mean.shape
  1027. if len(meanshape) != 2:
  1028. raise ValueError("Array `mean` must be two dimensional.")
  1029. if np.any(meanshape == 0):
  1030. raise ValueError("Array `mean` has invalid shape.")
  1031. # Process among-row covariance
  1032. rowcov = np.asarray(rowcov, dtype=float)
  1033. if rowcov.ndim == 0:
  1034. if mean is not None:
  1035. rowcov = rowcov * np.identity(meanshape[0])
  1036. else:
  1037. rowcov = rowcov * np.identity(1)
  1038. elif rowcov.ndim == 1:
  1039. rowcov = np.diag(rowcov)
  1040. rowshape = rowcov.shape
  1041. if len(rowshape) != 2:
  1042. raise ValueError("`rowcov` must be a scalar or a 2D array.")
  1043. if rowshape[0] != rowshape[1]:
  1044. raise ValueError("Array `rowcov` must be square.")
  1045. if rowshape[0] == 0:
  1046. raise ValueError("Array `rowcov` has invalid shape.")
  1047. numrows = rowshape[0]
  1048. # Process among-column covariance
  1049. colcov = np.asarray(colcov, dtype=float)
  1050. if colcov.ndim == 0:
  1051. if mean is not None:
  1052. colcov = colcov * np.identity(meanshape[1])
  1053. else:
  1054. colcov = colcov * np.identity(1)
  1055. elif colcov.ndim == 1:
  1056. colcov = np.diag(colcov)
  1057. colshape = colcov.shape
  1058. if len(colshape) != 2:
  1059. raise ValueError("`colcov` must be a scalar or a 2D array.")
  1060. if colshape[0] != colshape[1]:
  1061. raise ValueError("Array `colcov` must be square.")
  1062. if colshape[0] == 0:
  1063. raise ValueError("Array `colcov` has invalid shape.")
  1064. numcols = colshape[0]
  1065. # Ensure mean and covariances compatible
  1066. if mean is not None:
  1067. if meanshape[0] != numrows:
  1068. raise ValueError("Arrays `mean` and `rowcov` must have the "
  1069. "same number of rows.")
  1070. if meanshape[1] != numcols:
  1071. raise ValueError("Arrays `mean` and `colcov` must have the "
  1072. "same number of columns.")
  1073. else:
  1074. mean = np.zeros((numrows, numcols))
  1075. dims = (numrows, numcols)
  1076. return dims, mean, rowcov, colcov
  1077. def _process_quantiles(self, X, dims):
  1078. """
  1079. Adjust quantiles array so that last two axes labels the components of
  1080. each data point.
  1081. """
  1082. X = np.asarray(X, dtype=float)
  1083. if X.ndim == 2:
  1084. X = X[np.newaxis, :]
  1085. if X.shape[-2:] != dims:
  1086. raise ValueError("The shape of array `X` is not compatible "
  1087. "with the distribution parameters.")
  1088. return X
  1089. def _logpdf(self, dims, X, mean, row_prec_rt, log_det_rowcov,
  1090. col_prec_rt, log_det_colcov):
  1091. """Log of the matrix normal probability density function.
  1092. Parameters
  1093. ----------
  1094. dims : tuple
  1095. Dimensions of the matrix variates
  1096. X : ndarray
  1097. Points at which to evaluate the log of the probability
  1098. density function
  1099. mean : ndarray
  1100. Mean of the distribution
  1101. row_prec_rt : ndarray
  1102. A decomposition such that np.dot(row_prec_rt, row_prec_rt.T)
  1103. is the inverse of the among-row covariance matrix
  1104. log_det_rowcov : float
  1105. Logarithm of the determinant of the among-row covariance matrix
  1106. col_prec_rt : ndarray
  1107. A decomposition such that np.dot(col_prec_rt, col_prec_rt.T)
  1108. is the inverse of the among-column covariance matrix
  1109. log_det_colcov : float
  1110. Logarithm of the determinant of the among-column covariance matrix
  1111. Notes
  1112. -----
  1113. As this function does no argument checking, it should not be
  1114. called directly; use 'logpdf' instead.
  1115. """
  1116. numrows, numcols = dims
  1117. roll_dev = np.moveaxis(X-mean, -1, 0)
  1118. scale_dev = np.tensordot(col_prec_rt.T,
  1119. np.dot(roll_dev, row_prec_rt), 1)
  1120. maha = np.sum(np.sum(np.square(scale_dev), axis=-1), axis=0)
  1121. return -0.5 * (numrows*numcols*_LOG_2PI + numcols*log_det_rowcov
  1122. + numrows*log_det_colcov + maha)
  1123. def logpdf(self, X, mean=None, rowcov=1, colcov=1):
  1124. """Log of the matrix normal probability density function.
  1125. Parameters
  1126. ----------
  1127. X : array_like
  1128. Quantiles, with the last two axes of `X` denoting the components.
  1129. %(_matnorm_doc_default_callparams)s
  1130. Returns
  1131. -------
  1132. logpdf : ndarray
  1133. Log of the probability density function evaluated at `X`
  1134. Notes
  1135. -----
  1136. %(_matnorm_doc_callparams_note)s
  1137. """
  1138. dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
  1139. colcov)
  1140. X = self._process_quantiles(X, dims)
  1141. rowpsd = _PSD(rowcov, allow_singular=False)
  1142. colpsd = _PSD(colcov, allow_singular=False)
  1143. out = self._logpdf(dims, X, mean, rowpsd.U, rowpsd.log_pdet, colpsd.U,
  1144. colpsd.log_pdet)
  1145. return _squeeze_output(out)
  1146. def pdf(self, X, mean=None, rowcov=1, colcov=1):
  1147. """Matrix normal probability density function.
  1148. Parameters
  1149. ----------
  1150. X : array_like
  1151. Quantiles, with the last two axes of `X` denoting the components.
  1152. %(_matnorm_doc_default_callparams)s
  1153. Returns
  1154. -------
  1155. pdf : ndarray
  1156. Probability density function evaluated at `X`
  1157. Notes
  1158. -----
  1159. %(_matnorm_doc_callparams_note)s
  1160. """
  1161. return np.exp(self.logpdf(X, mean, rowcov, colcov))
  1162. def rvs(self, mean=None, rowcov=1, colcov=1, size=1, random_state=None):
  1163. """Draw random samples from a matrix normal distribution.
  1164. Parameters
  1165. ----------
  1166. %(_matnorm_doc_default_callparams)s
  1167. size : integer, optional
  1168. Number of samples to draw (default 1).
  1169. %(_doc_random_state)s
  1170. Returns
  1171. -------
  1172. rvs : ndarray or scalar
  1173. Random variates of size (`size`, `dims`), where `dims` is the
  1174. dimension of the random matrices.
  1175. Notes
  1176. -----
  1177. %(_matnorm_doc_callparams_note)s
  1178. """
  1179. size = int(size)
  1180. dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
  1181. colcov)
  1182. rowchol = scipy.linalg.cholesky(rowcov, lower=True)
  1183. colchol = scipy.linalg.cholesky(colcov, lower=True)
  1184. random_state = self._get_random_state(random_state)
  1185. # We aren't generating standard normal variates with size=(size,
  1186. # dims[0], dims[1]) directly to ensure random variates remain backwards
  1187. # compatible. See https://github.com/scipy/scipy/pull/12312 for more
  1188. # details.
  1189. std_norm = random_state.standard_normal(
  1190. size=(dims[1], size, dims[0])
  1191. ).transpose(1, 2, 0)
  1192. out = mean + np.einsum('jp,ipq,kq->ijk',
  1193. rowchol, std_norm, colchol,
  1194. optimize=True)
  1195. if size == 1:
  1196. out = out.reshape(mean.shape)
  1197. return out
  1198. def entropy(self, rowcov=1, colcov=1):
  1199. """Log of the matrix normal probability density function.
  1200. Parameters
  1201. ----------
  1202. rowcov : array_like, optional
  1203. Among-row covariance matrix of the distribution (default: ``1``)
  1204. colcov : array_like, optional
  1205. Among-column covariance matrix of the distribution (default: ``1``)
  1206. Returns
  1207. -------
  1208. entropy : float
  1209. Entropy of the distribution
  1210. Notes
  1211. -----
  1212. %(_matnorm_doc_callparams_note)s
  1213. """
  1214. dummy_mean = np.zeros((rowcov.shape[0], colcov.shape[0]))
  1215. dims, _, rowcov, colcov = self._process_parameters(dummy_mean,
  1216. rowcov,
  1217. colcov)
  1218. rowpsd = _PSD(rowcov, allow_singular=False)
  1219. colpsd = _PSD(colcov, allow_singular=False)
  1220. return self._entropy(dims, rowpsd.log_pdet, colpsd.log_pdet)
  1221. def _entropy(self, dims, row_cov_logdet, col_cov_logdet):
  1222. n, p = dims
  1223. return (0.5 * n * p * (1 + _LOG_2PI) + 0.5 * p * row_cov_logdet +
  1224. 0.5 * n * col_cov_logdet)
  1225. matrix_normal = matrix_normal_gen()
  1226. class matrix_normal_frozen(multi_rv_frozen):
  1227. """
  1228. Create a frozen matrix normal distribution.
  1229. Parameters
  1230. ----------
  1231. %(_matnorm_doc_default_callparams)s
  1232. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  1233. If `seed` is `None` the `~np.random.RandomState` singleton is used.
  1234. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  1235. with seed.
  1236. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  1237. then that object is used.
  1238. Default is `None`.
  1239. Examples
  1240. --------
  1241. >>> import numpy as np
  1242. >>> from scipy.stats import matrix_normal
  1243. >>> distn = matrix_normal(mean=np.zeros((3,3)))
  1244. >>> X = distn.rvs(); X
  1245. array([[-0.02976962, 0.93339138, -0.09663178],
  1246. [ 0.67405524, 0.28250467, -0.93308929],
  1247. [-0.31144782, 0.74535536, 1.30412916]])
  1248. >>> distn.pdf(X)
  1249. 2.5160642368346784e-05
  1250. >>> distn.logpdf(X)
  1251. -10.590229595124615
  1252. """
  1253. __class_getitem__ = None
  1254. def __init__(self, mean=None, rowcov=1, colcov=1, seed=None):
  1255. self._dist = matrix_normal_gen(seed)
  1256. self.dims, self.mean, self.rowcov, self.colcov = \
  1257. self._dist._process_parameters(mean, rowcov, colcov)
  1258. self.rowpsd = _PSD(self.rowcov, allow_singular=False)
  1259. self.colpsd = _PSD(self.colcov, allow_singular=False)
  1260. def logpdf(self, X):
  1261. X = self._dist._process_quantiles(X, self.dims)
  1262. out = self._dist._logpdf(self.dims, X, self.mean, self.rowpsd.U,
  1263. self.rowpsd.log_pdet, self.colpsd.U,
  1264. self.colpsd.log_pdet)
  1265. return _squeeze_output(out)
  1266. def pdf(self, X):
  1267. return np.exp(self.logpdf(X))
  1268. def rvs(self, size=1, random_state=None):
  1269. return self._dist.rvs(self.mean, self.rowcov, self.colcov, size,
  1270. random_state)
  1271. def entropy(self):
  1272. return self._dist._entropy(self.dims, self.rowpsd.log_pdet,
  1273. self.colpsd.log_pdet)
  1274. # Set frozen generator docstrings from corresponding docstrings in
  1275. # matrix_normal_gen and fill in default strings in class docstrings
  1276. for name in ['logpdf', 'pdf', 'rvs', 'entropy']:
  1277. method = matrix_normal_gen.__dict__[name]
  1278. method_frozen = matrix_normal_frozen.__dict__[name]
  1279. method_frozen.__doc__ = doccer.docformat(method.__doc__,
  1280. matnorm_docdict_noparams)
  1281. method.__doc__ = doccer.docformat(method.__doc__, matnorm_docdict_params)
  1282. _matt_doc_default_callparams = """\
  1283. mean : array_like, optional
  1284. Mean of the distribution (default: `None`)
  1285. row_spread : array_like, optional
  1286. Row-wise 2nd order raw central moment matrix (default: ``1``)
  1287. col_spread : array_like, optional
  1288. Column-wise 2nd order raw central moment matrix (default: ``1``)
  1289. df : scalar, optional
  1290. Degrees of freedom (default: ``1``)
  1291. """
  1292. _matt_doc_callparams_note = """\
  1293. If `mean` is set to `None` then a matrix of zeros is used for the mean.
  1294. The dimensions of this matrix are inferred from the shape of `row_spread` and
  1295. `col_spread`, if these are provided, or set to ``1`` if ambiguous.
  1296. `row_spread` and `col_spread` can be two-dimensional array_likes specifying the
  1297. spread matrices directly. Alternatively, a one-dimensional array will
  1298. be be interpreted as the entries of a diagonal matrix, and a scalar or
  1299. zero-dimensional array will be interpreted as this value times the
  1300. identity matrix.
  1301. """
  1302. _matt_doc_frozen_callparams = ""
  1303. _matt_doc_frozen_callparams_note = """\
  1304. See class definition for a detailed description of parameters."""
  1305. matrix_t_docdict_params = {
  1306. "_matt_doc_default_callparams": _matt_doc_default_callparams,
  1307. "_matt_doc_callparams_note": _matt_doc_callparams_note,
  1308. "_doc_random_state": _doc_random_state,
  1309. }
  1310. matrix_t_docdict_noparams = {
  1311. "_matt_doc_default_callparams": _matt_doc_frozen_callparams,
  1312. "_matt_doc_callparams_note": _matt_doc_frozen_callparams_note,
  1313. "_doc_random_state": _doc_random_state,
  1314. }
  1315. class matrix_t_gen(multi_rv_generic):
  1316. r"""A matrix t-random variable.
  1317. The `mean` keyword specifies the mean.
  1318. The `row_spread` keyword specifies the row-wise spread matrix.
  1319. The `col_spread` keyword specifies the column-wise spread matrix.
  1320. Methods
  1321. -------
  1322. pdf(x, mean=None, row_spread=None, col_spread=None)
  1323. Probability density function.
  1324. logpdf(x, mean=None, row_spread=None, col_spread=None)
  1325. Log of the probability density function.
  1326. rvs(mean=None, row_spread=1, col_spread=1, df=1, size=1, random_state=None)
  1327. Draw random samples.
  1328. Parameters
  1329. ----------
  1330. %(_matt_doc_default_callparams)s
  1331. %(_doc_random_state)s
  1332. Notes
  1333. -----
  1334. %(_matt_doc_callparams_note)s
  1335. The spread matrices specified by `row_spread` and `col_spread` must be
  1336. (symmetric) positive definite. If the samples in `X` have shape `(m,n)`
  1337. then `row_spread` must have shape `(m,m)` and `col_spread` must have shape `(n,n)`.
  1338. Spread matrices must be full rank.
  1339. The probability density function for `matrix_t` is
  1340. .. math::
  1341. f(X \vert \mathrm{M}, \Sigma, \Omega, \mathrm{df}) =
  1342. \frac{
  1343. \Gamma_n \left(
  1344. \frac{\mathrm{df} + m + n - 1}{2}
  1345. \right)
  1346. \left(
  1347. \det \left(
  1348. I_n + (X - \mathrm{M})^T \Sigma^{-1} (X - \mathrm{M}) \Omega^{-1}
  1349. \right)
  1350. \right)^{ -\frac{\mathrm{df} + m + n - 1}{2} }
  1351. }{
  1352. \Gamma_n \left(
  1353. \frac{\mathrm{df} + n - 1}{2}
  1354. \right)
  1355. \pi^{mn / 2}
  1356. \left( \det \Sigma \right)^{n/2}
  1357. \left( \det \Omega \right)^{m/2}
  1358. }
  1359. or, alternatively,
  1360. .. math::
  1361. f(X \vert \mathrm{M}, \Sigma, \Omega, \mathrm{df}) =
  1362. \frac{
  1363. \Gamma_m \left(
  1364. \frac{\mathrm{df} + m + n - 1}{2}
  1365. \right)
  1366. \left(
  1367. \det \left(
  1368. I_m + \Sigma^{-1} (X - \mathrm{M}) \Omega^{-1} (X - \mathrm{M})^T
  1369. \right)
  1370. \right)^{ -\frac{\mathrm{df} + m + n - 1}{2} }
  1371. }{
  1372. \Gamma_m \left(
  1373. \frac{\mathrm{df} + n - 1}{2}
  1374. \right)
  1375. \pi^{mn / 2}
  1376. \left( \det \Sigma \right)^{n/2}
  1377. \left( \det \Omega \right)^{m/2}
  1378. }
  1379. where :math:`\mathrm{M}` is the mean,
  1380. :math:`\Sigma` is the row-wise spread matrix,
  1381. :math:`\Omega` is the column-wise matrix,
  1382. :math:`\mathrm{df}` is the degrees of freedom,
  1383. and :math:`\Gamma_n` is the multivariate gamma function.
  1384. These equivalent formulations come from the identity
  1385. .. math::
  1386. \det\left( I_m + A B \right) = \det\left( I_n + B A \right)
  1387. for :math:`m \times n` arrays :math:`A` and :math:`B^T`
  1388. and the fact that
  1389. :math:`\gamma_n(\mathrm{df} + m) / \gamma_n(\mathrm{df})`
  1390. is equal to
  1391. :math:`\gamma_m(\mathrm{df} + n) / \gamma_m(\mathrm{df})`,
  1392. where
  1393. .. math::
  1394. \gamma_m(\mathrm{df}) = 2^{m(m-1)/2}
  1395. \Gamma_m\left( (\mathrm{df} + m - 1) / 2 \right)
  1396. denotes a normalized multivariate gamma function.
  1397. When :math:`\mathrm{df} = 1` this distribution is known as the matrix
  1398. variate Cauchy.
  1399. .. versionadded:: 1.17.0
  1400. References
  1401. ----------
  1402. .. [1] Gupta, A.K., & Nagar, D.K. (2000). Matrix Variate Distributions (1st ed.).
  1403. Chapman and Hall/CRC.
  1404. Examples
  1405. --------
  1406. >>> import numpy as np
  1407. >>> from scipy.stats import matrix_t
  1408. >>> M = np.arange(6).reshape(3,2)
  1409. >>> M
  1410. array([[0, 1],
  1411. [2, 3],
  1412. [4, 5]])
  1413. >>> Sigma = np.diag([1,2,3])
  1414. >>> Sigma
  1415. array([[1, 0, 0],
  1416. [0, 2, 0],
  1417. [0, 0, 3]])
  1418. >>> Omega = 0.3*np.identity(2)
  1419. >>> Omega
  1420. array([[ 0.3, 0. ],
  1421. [ 0. , 0.3]])
  1422. >>> X = M + 0.1
  1423. >>> X
  1424. array([[ 0.1, 1.1],
  1425. [ 2.1, 3.1],
  1426. [ 4.1, 5.1]])
  1427. >>> df = 3
  1428. >>> matrix_t.pdf(X, mean=M, row_spread=Sigma, col_spread=Omega, df=df)
  1429. 0.9972880280135796
  1430. Alternatively, the object may be called (as a function) to fix the mean
  1431. and spread parameters, returning a "frozen" matrix t
  1432. random variable:
  1433. >>> rv = matrix_t(mean=None, row_spread=1, col_spread=1, df=1)
  1434. >>> # Frozen object with the same methods but holding the given
  1435. >>> # mean and spreads and degrees of freedom fixed.
  1436. """
  1437. def __init__(self, seed=None):
  1438. super().__init__(seed)
  1439. self.__doc__ = scipy._lib.doccer.docformat(
  1440. self.__doc__, matrix_t_docdict_params
  1441. )
  1442. def __call__(self, mean=None, row_spread=1, col_spread=1, df=None, seed=None):
  1443. """Create a frozen matrix t distribution.
  1444. See `matrix_t_frozen` for more information.
  1445. """
  1446. return matrix_t_frozen(mean, row_spread, col_spread, df, seed)
  1447. def _process_parameters(self, mean, row_spread, col_spread, df):
  1448. """
  1449. Infer dimensionality from mean or covariance matrices.
  1450. Handle defaults. Ensure conformality.
  1451. Parameters
  1452. ----------
  1453. mean : ndarray, shape (m,n)
  1454. Mean of the distribution
  1455. row_spread : ndarray, shape (m,m)
  1456. Row-wise spread matrix
  1457. col_spread : ndarray, shape (n,n)
  1458. Column-wise spread matrix
  1459. df : float
  1460. Degrees of freedom
  1461. """
  1462. # Process mean
  1463. if mean is not None:
  1464. mean = np.asarray(mean, dtype=float)
  1465. meanshape = mean.shape
  1466. if 0 in meanshape:
  1467. raise ValueError("Array `mean` has invalid shape.")
  1468. if len(meanshape) != 2:
  1469. raise ValueError("Array `mean` must be 2D.")
  1470. # Process row-wise spread
  1471. row_spread = np.asarray(row_spread, dtype=float)
  1472. if row_spread.ndim == 0:
  1473. if mean is not None:
  1474. row_spread = row_spread * np.identity(meanshape[0])
  1475. else:
  1476. row_spread = row_spread * np.identity(1)
  1477. elif row_spread.ndim == 1:
  1478. row_spread = np.diag(row_spread)
  1479. rowshape = row_spread.shape
  1480. if 0 in rowshape:
  1481. raise ValueError("Array `row_spread` has invalid shape.")
  1482. if len(rowshape) != 2:
  1483. raise ValueError("Array `row_spread` must be a scalar or a 2D array.")
  1484. if rowshape[0] != rowshape[1]:
  1485. raise ValueError("Array `row_spread` must be square.")
  1486. numrows = rowshape[0]
  1487. # Process column-wise spread
  1488. col_spread = np.asarray(col_spread, dtype=float)
  1489. if col_spread.ndim == 0:
  1490. if mean is not None:
  1491. col_spread = col_spread * np.identity(meanshape[1])
  1492. else:
  1493. col_spread = col_spread * np.identity(1)
  1494. elif col_spread.ndim == 1:
  1495. col_spread = np.diag(col_spread)
  1496. colshape = col_spread.shape
  1497. if 0 in colshape:
  1498. raise ValueError("Array `col_spread` has invalid shape.")
  1499. if len(colshape) != 2:
  1500. raise ValueError("Array `col_spread` must be a scalar or a 2D array.")
  1501. if colshape[0] != colshape[1]:
  1502. raise ValueError("Array `col_spread` must be square.")
  1503. numcols = colshape[0]
  1504. # Ensure mean and spreads are conformal
  1505. if mean is not None:
  1506. if meanshape[0] != numrows:
  1507. raise ValueError(
  1508. "Arrays `mean` and `row_spread` must have the same number of rows."
  1509. )
  1510. if meanshape[1] != numcols:
  1511. raise ValueError(
  1512. "Arrays `mean` and `col_spread` must have the same number "
  1513. "of columns."
  1514. )
  1515. else:
  1516. mean = np.zeros((numrows, numcols))
  1517. dims = (numrows, numcols)
  1518. if df is None:
  1519. df = 1 # default to matrix variate Cauchy
  1520. elif not np.isscalar(df):
  1521. raise ValueError("Degrees of freedom must be a scalar.")
  1522. elif df <= 0:
  1523. raise ValueError("Degrees of freedom must be positive.")
  1524. return dims, mean, row_spread, col_spread, df
  1525. def _process_quantiles(self, X, dims):
  1526. """
  1527. Adjust quantiles array so that last two axes labels the component of
  1528. each data point.
  1529. """
  1530. X = np.asarray(X, dtype=float)
  1531. if X.ndim == 2:
  1532. X = X[np.newaxis, :]
  1533. if X.shape[-2:] != dims:
  1534. raise ValueError(
  1535. "The shape of array `X` is not conformal with "
  1536. "the distribution parameters."
  1537. )
  1538. return X
  1539. def _logpdf(
  1540. self,
  1541. dims,
  1542. X,
  1543. mean,
  1544. df,
  1545. invrow_spread,
  1546. invcol_spread,
  1547. logdetrow_spread,
  1548. logdetcol_spread,
  1549. ):
  1550. """
  1551. Log of the matrix t probability density function.
  1552. Parameters
  1553. ----------
  1554. dims : tuple
  1555. Dimensions of the matrix variates
  1556. X : ndarray, shape (m,n) (equal to `dims`)
  1557. Points at which to evaluate the log of the probability density function
  1558. mean : ndarray, shape (m,n)
  1559. Mean of the distribution
  1560. df : float
  1561. Degrees-of-freedom parameter
  1562. invrow_spread : ndarray, shape (m,m)
  1563. Inverse of the row-wise spread matrix
  1564. invcol_spread : ndarray, shape (n,n)
  1565. Inverse of the column-wise spread matrix
  1566. logdetrow_spread : float
  1567. Log-determinant of the row-wise spread matrix
  1568. detcol_spread : float
  1569. Log-determinant of the column-wise spread matrix
  1570. Notes
  1571. -----
  1572. As this function does no argument checking, it should not be
  1573. called directly; use `logpdf` instead.
  1574. """
  1575. m, n = dims
  1576. X_shape = X.shape
  1577. if X.ndim > 3:
  1578. X = X.reshape(-1, m, n)
  1579. X_centered = X - mean[np.newaxis, ...]
  1580. det_arg = np.identity(n) + np.einsum(
  1581. "nij,njk,nkl,nlp->nip",
  1582. X_centered.transpose(0, 2, 1),
  1583. invrow_spread[np.newaxis, ...],
  1584. X_centered,
  1585. invcol_spread[np.newaxis, ...],
  1586. optimize=True,
  1587. )
  1588. _, logdet = np.linalg.slogdet(det_arg)
  1589. log_d_mn = -((df + m + n - 1) / 2) * logdet
  1590. log_f_mn = (
  1591. scipy.special.multigammaln((df + m + n - 1) / 2, n)
  1592. - scipy.special.multigammaln((df + n - 1) / 2, n)
  1593. - (m * n / 2) * _LOG_PI
  1594. - (n / 2) * logdetrow_spread
  1595. - (m / 2) * logdetcol_spread
  1596. )
  1597. retval = log_d_mn + log_f_mn
  1598. if len(X_shape) > 3:
  1599. retval = retval.reshape(X_shape[:-2])
  1600. return retval
  1601. def logpdf(self, X, mean=None, row_spread=1, col_spread=1, df=1):
  1602. """Log of the matrix normal probability density function.
  1603. Parameters
  1604. ----------
  1605. X : array_like
  1606. Quantiles, with the last two axes of `X` denoting the components.
  1607. %(_matt_doc_default_callparams)s
  1608. Returns
  1609. -------
  1610. logpdf : ndarray
  1611. Log of the probability density function evaluated at `X`
  1612. Notes
  1613. -----
  1614. %(_matt_doc_callparams_note)s
  1615. Examples
  1616. -------
  1617. >>> import numpy as np
  1618. >>> from scipy.stats import matrix_t
  1619. >>> M = np.arange(6).reshape(3,2); M
  1620. array([[0, 1],
  1621. [2, 3],
  1622. [4, 5]])
  1623. >>> Sigma = np.diag([1,2,3]); Sigma
  1624. array([[1, 0, 0],
  1625. [0, 2, 0],
  1626. [0, 0, 3]])
  1627. >>> Omega = 0.3*np.identity(2); Omega
  1628. array([[ 0.3, 0. ],
  1629. [ 0. , 0.3]])
  1630. >>> X = M + 0.1; X
  1631. array([[ 0.1, 1.1],
  1632. [ 2.1, 3.1],
  1633. [ 4.1, 5.1]])
  1634. >>> df = 3; df
  1635. 3
  1636. >>> matrix_t.logpdf(X, mean=M, row_spread=Sigma, col_spread=Omega, df=df)
  1637. -0.002715656044664061
  1638. """
  1639. dims, mean, row_spread, col_spread, df = self._process_parameters(
  1640. mean, row_spread, col_spread, df
  1641. )
  1642. X = self._process_quantiles(X, dims)
  1643. rowpsd = _PSD(row_spread, allow_singular=False)
  1644. colpsd = _PSD(col_spread, allow_singular=False)
  1645. invrow_spread = rowpsd.pinv
  1646. invcol_spread = colpsd.pinv
  1647. logdetrow_spread = rowpsd.log_pdet
  1648. logdetcol_spread = colpsd.log_pdet
  1649. out = self._logpdf(
  1650. dims,
  1651. X,
  1652. mean,
  1653. df,
  1654. invrow_spread,
  1655. invcol_spread,
  1656. logdetrow_spread,
  1657. logdetcol_spread,
  1658. )
  1659. return _squeeze_output(out)
  1660. def pdf(self, X, mean=None, row_spread=1, col_spread=1, df=1):
  1661. """Matrix t probability density function.
  1662. Parameters
  1663. ----------
  1664. X : array_like
  1665. Quantiles, with the last two axes of `X` denoting the components.
  1666. %(_matt_doc_default_callparams)s
  1667. Returns
  1668. -------
  1669. pdf : ndarray
  1670. Probability density function evaluated at `X`
  1671. Notes
  1672. -----
  1673. %(_matt_doc_callparams_note)s
  1674. Examples
  1675. --------
  1676. >>> import numpy as np
  1677. >>> from scipy.stats import matrix_t
  1678. >>> M = np.arange(6).reshape(3,2); M
  1679. array([[0, 1],
  1680. [2, 3],
  1681. [4, 5]])
  1682. >>> Sigma = np.diag([1,2,3]); Sigma
  1683. array([[1, 0, 0],
  1684. [0, 2, 0],
  1685. [0, 0, 3]])
  1686. >>> Omega = 0.3*np.identity(2); Omega
  1687. array([[ 0.3, 0. ],
  1688. [ 0. , 0.3]])
  1689. >>> X = M + 0.1; X
  1690. array([[ 0.1, 1.1],
  1691. [ 2.1, 3.1],
  1692. [ 4.1, 5.1]])
  1693. >>> df = 3; df
  1694. 3
  1695. >>> matrix_t.logpdf(X, mean=M, row_spread=Sigma, col_spread=Omega, df=df)
  1696. 0.9972880280135796
  1697. """
  1698. return np.exp(self.logpdf(X, mean, row_spread, col_spread, df))
  1699. def rvs(
  1700. self, mean=None, row_spread=1, col_spread=1, df=1, size=1, random_state=None
  1701. ) -> np.ndarray:
  1702. """Draw random samples from a matrix t distribution.
  1703. Parameters
  1704. ----------
  1705. %(_matt_doc_default_callparams)s
  1706. size : integer, optional
  1707. Number of samples to draw (default 1).
  1708. %(_doc_random_state)s
  1709. Returns
  1710. -------
  1711. rvs : ndarray or scalar
  1712. Random variates of size (`size`, `dims`), where `dims` is the
  1713. dimension of the random matrices.
  1714. Notes
  1715. -----
  1716. %(_matt_doc_callparams_note)s
  1717. This method takes advantage of the two equivalent expressions of the
  1718. probability density function. It samples a Cholesky factor of a
  1719. random variate of the appropriate inverse Wishart distribution using
  1720. the smaller of the row/column dimensions.
  1721. """
  1722. size = int(size)
  1723. dims, mean, row_spread, col_spread, df = self._process_parameters(
  1724. mean, row_spread, col_spread, df
  1725. )
  1726. random_state = self._get_random_state(random_state)
  1727. # see scipy.stats.matrix_normal.rvs
  1728. std_norm = random_state.standard_normal(
  1729. size=(dims[1], size, dims[0])
  1730. ).transpose(1, 2, 0)
  1731. if dims[0] <= dims[1]:
  1732. rowchol = _cholesky_invwishart_rvs(df, row_spread, size, random_state)
  1733. colchol = scipy.linalg.cholesky(col_spread, lower=True)[np.newaxis, ...]
  1734. else:
  1735. rowchol = scipy.linalg.cholesky(row_spread, lower=True)[np.newaxis, ...]
  1736. colchol = _cholesky_invwishart_rvs(df, col_spread, size, random_state)
  1737. t_raw = np.einsum("ijp,ipq,ikq->ijk", rowchol, std_norm, colchol, optimize=True)
  1738. t_centered = mean[np.newaxis, ...] + t_raw
  1739. if size == 1:
  1740. t_centered = t_centered.reshape(mean.shape)
  1741. return t_centered
  1742. matrix_t = matrix_t_gen()
  1743. class matrix_t_frozen:
  1744. def __init__(self, mean, row_spread, col_spread, df, seed=None):
  1745. self._dist = matrix_t_gen(seed)
  1746. self.dims, self.mean, self.row_spread, self.col_spread, self.df = (
  1747. self._dist._process_parameters(mean, row_spread, col_spread, df)
  1748. )
  1749. self._random_state = np.random.RandomState(seed)
  1750. self.rowpsd = _PSD(self.row_spread, allow_singular=False)
  1751. self.colpsd = _PSD(self.col_spread, allow_singular=False)
  1752. def logpdf(self, X):
  1753. X = self._dist._process_quantiles(X, self.dims)
  1754. rowpsd = _PSD(self.row_spread, allow_singular=False)
  1755. colpsd = _PSD(self.col_spread, allow_singular=False)
  1756. invrow_spread = rowpsd.pinv
  1757. invcol_spread = colpsd.pinv
  1758. logdetrow_spread = rowpsd.log_pdet
  1759. logdetcol_spread = colpsd.log_pdet
  1760. out = self._dist._logpdf(
  1761. self.dims,
  1762. X,
  1763. self.mean,
  1764. self.df,
  1765. invrow_spread,
  1766. invcol_spread,
  1767. logdetrow_spread,
  1768. logdetcol_spread,
  1769. )
  1770. return _squeeze_output(out)
  1771. def pdf(self, X):
  1772. return np.exp(self.logpdf(X))
  1773. def rvs(self, size=1, random_state=None):
  1774. return self._dist.rvs(
  1775. self.mean, self.row_spread, self.col_spread, self.df, size, random_state
  1776. )
  1777. # Set frozen generator docstrings from corresponding docstrings in
  1778. # matrix_t_gen and fill in default strings in class docstrings
  1779. for name in ["logpdf", "pdf", "rvs"]:
  1780. method = matrix_t_gen.__dict__[name]
  1781. method_frozen = matrix_t_frozen.__dict__[name]
  1782. method_frozen.__doc__ = scipy._lib.doccer.docformat(
  1783. method.__doc__, matrix_t_docdict_noparams
  1784. )
  1785. method.__doc__ = scipy._lib.doccer.docformat(
  1786. method.__doc__, matrix_t_docdict_params
  1787. )
  1788. def _cholesky_invwishart_rvs(
  1789. df: float, scale: np.ndarray, size: int, random_state: np.random.Generator
  1790. ) -> np.ndarray:
  1791. r"""Samples the lower Cholesky factor of a matrix following an inverse
  1792. Wishart distribution.
  1793. Notes
  1794. -----
  1795. Intended to be used *as a step in the process* for computing random variates
  1796. of a matrix t distribution :math:`\mathcal{T}_{m,n}` by appealing to its
  1797. alternative form as a matrix mixture
  1798. .. math::
  1799. \mathcal{T}_{m,n}( \mathrm{df}, \mathrm{M}, \Sigma, \Omega )
  1800. = \mathcal{N}_{m,n}(
  1801. \mathrm{M},
  1802. \mathcal{W}^{-1}_m(\mathrm{df} + m - 1, \Sigma),
  1803. \Omega
  1804. )
  1805. = \mathcal{N}_{m,n}(
  1806. \mathrm{M},
  1807. \Sigma,
  1808. \mathcal{W}^{-1}_n(\mathrm{df} - n + 1, \Omega)
  1809. )
  1810. where :math:`\mathcal{N}_{m,n}` is a matrix normal distribution
  1811. and :math:`\mathcal{W}^{-1}_d` is an inverse Wishart distribution.
  1812. Accordingly, the degrees of freedom adjustment
  1813. :math:`\mathrm{df} \to \mathrm{df} + d - 1`
  1814. occurrs in the scope of this function.
  1815. """
  1816. df_iw = df + scale.shape[0] - 1
  1817. iw_samples = scipy.stats.invwishart.rvs(df_iw, scale, size, random_state)
  1818. if size == 1:
  1819. iw_samples = iw_samples[np.newaxis, ...]
  1820. chol_samples = np.empty_like(iw_samples)
  1821. for idx in range(size):
  1822. chol_samples[idx] = scipy.linalg.cholesky(
  1823. iw_samples[idx], lower=True, check_finite=False
  1824. ).reshape(iw_samples.shape[1:])
  1825. return chol_samples.reshape((size, *scale.shape))
  1826. _dirichlet_doc_default_callparams = """\
  1827. alpha : array_like
  1828. The concentration parameters. The number of entries determines the
  1829. dimensionality of the distribution.
  1830. """
  1831. _dirichlet_doc_frozen_callparams = ""
  1832. _dirichlet_doc_frozen_callparams_note = """\
  1833. See class definition for a detailed description of parameters."""
  1834. dirichlet_docdict_params = {
  1835. '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams,
  1836. '_doc_random_state': _doc_random_state
  1837. }
  1838. dirichlet_docdict_noparams = {
  1839. '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams,
  1840. '_doc_random_state': _doc_random_state
  1841. }
  1842. def _dirichlet_check_parameters(alpha):
  1843. alpha = np.asarray(alpha)
  1844. if np.min(alpha) <= 0:
  1845. raise ValueError("All parameters must be greater than 0")
  1846. elif alpha.ndim != 1:
  1847. raise ValueError("Parameter vector 'a' must be one dimensional, "
  1848. f"but a.shape = {alpha.shape}.")
  1849. return alpha
  1850. def _dirichlet_check_input(alpha, x):
  1851. x = np.asarray(x)
  1852. if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]:
  1853. raise ValueError("Vector 'x' must have either the same number "
  1854. "of entries as, or one entry fewer than, "
  1855. f"parameter vector 'a', but alpha.shape = {alpha.shape} "
  1856. f"and x.shape = {x.shape}.")
  1857. if x.shape[0] != alpha.shape[0]:
  1858. xk = np.array([1 - np.sum(x, 0)])
  1859. if xk.ndim == 1:
  1860. x = np.append(x, xk)
  1861. elif xk.ndim == 2:
  1862. x = np.vstack((x, xk))
  1863. else:
  1864. raise ValueError("The input must be one dimensional or a two "
  1865. "dimensional matrix containing the entries.")
  1866. if np.min(x) < 0:
  1867. raise ValueError("Each entry in 'x' must be greater than or equal "
  1868. "to zero.")
  1869. if np.max(x) > 1:
  1870. raise ValueError("Each entry in 'x' must be smaller or equal one.")
  1871. # Check x_i > 0 or alpha_i > 1
  1872. xeq0 = (x == 0)
  1873. alphalt1 = (alpha < 1)
  1874. if x.shape != alpha.shape:
  1875. alphalt1 = np.repeat(alphalt1, x.shape[-1], axis=-1).reshape(x.shape)
  1876. chk = np.logical_and(xeq0, alphalt1)
  1877. if np.sum(chk):
  1878. raise ValueError("Each entry in 'x' must be greater than zero if its "
  1879. "alpha is less than one.")
  1880. if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any():
  1881. raise ValueError("The input vector 'x' must lie within the normal "
  1882. f"simplex. but np.sum(x, 0) = {np.sum(x, 0)}.")
  1883. return x
  1884. def _lnB(alpha):
  1885. r"""Internal helper function to compute the log of the useful quotient.
  1886. .. math::
  1887. B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)}
  1888. {\Gamma\left(\sum_{i=1}^{K} \alpha_i \right)}
  1889. Parameters
  1890. ----------
  1891. %(_dirichlet_doc_default_callparams)s
  1892. Returns
  1893. -------
  1894. B : scalar
  1895. Helper quotient, internal use only
  1896. """
  1897. return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))
  1898. class dirichlet_gen(multi_rv_generic):
  1899. r"""A Dirichlet random variable.
  1900. The ``alpha`` keyword specifies the concentration parameters of the
  1901. distribution.
  1902. .. versionadded:: 0.15.0
  1903. Methods
  1904. -------
  1905. pdf(x, alpha)
  1906. Probability density function.
  1907. logpdf(x, alpha)
  1908. Log of the probability density function.
  1909. rvs(alpha, size=1, random_state=None)
  1910. Draw random samples from a Dirichlet distribution.
  1911. mean(alpha)
  1912. The mean of the Dirichlet distribution
  1913. var(alpha)
  1914. The variance of the Dirichlet distribution
  1915. cov(alpha)
  1916. The covariance of the Dirichlet distribution
  1917. entropy(alpha)
  1918. Compute the differential entropy of the Dirichlet distribution.
  1919. Parameters
  1920. ----------
  1921. %(_dirichlet_doc_default_callparams)s
  1922. %(_doc_random_state)s
  1923. Notes
  1924. -----
  1925. Each :math:`\alpha` entry must be positive. The distribution has only
  1926. support on the simplex defined by
  1927. .. math::
  1928. \sum_{i=1}^{K} x_i = 1
  1929. where :math:`0 < x_i < 1`.
  1930. If the quantiles don't lie within the simplex, a ValueError is raised.
  1931. The probability density function for `dirichlet` is
  1932. .. math::
  1933. f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1}
  1934. where
  1935. .. math::
  1936. \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)}
  1937. {\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)}
  1938. and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the
  1939. concentration parameters and :math:`K` is the dimension of the space
  1940. where :math:`x` takes values.
  1941. Note that the `dirichlet` interface is somewhat inconsistent.
  1942. The array returned by the rvs function is transposed
  1943. with respect to the format expected by the pdf and logpdf.
  1944. Examples
  1945. --------
  1946. >>> import numpy as np
  1947. >>> from scipy.stats import dirichlet
  1948. Generate a dirichlet random variable
  1949. >>> quantiles = np.array([0.2, 0.2, 0.6]) # specify quantiles
  1950. >>> alpha = np.array([0.4, 5, 15]) # specify concentration parameters
  1951. >>> dirichlet.pdf(quantiles, alpha)
  1952. 0.2843831684937255
  1953. The same PDF but following a log scale
  1954. >>> dirichlet.logpdf(quantiles, alpha)
  1955. -1.2574327653159187
  1956. Once we specify the dirichlet distribution
  1957. we can then calculate quantities of interest
  1958. >>> dirichlet.mean(alpha) # get the mean of the distribution
  1959. array([0.01960784, 0.24509804, 0.73529412])
  1960. >>> dirichlet.var(alpha) # get variance
  1961. array([0.00089829, 0.00864603, 0.00909517])
  1962. >>> dirichlet.entropy(alpha) # calculate the differential entropy
  1963. -4.3280162474082715
  1964. We can also return random samples from the distribution
  1965. >>> dirichlet.rvs(alpha, size=1, random_state=1)
  1966. array([[0.00766178, 0.24670518, 0.74563305]])
  1967. >>> dirichlet.rvs(alpha, size=2, random_state=2)
  1968. array([[0.01639427, 0.1292273 , 0.85437844],
  1969. [0.00156917, 0.19033695, 0.80809388]])
  1970. Alternatively, the object may be called (as a function) to fix
  1971. concentration parameters, returning a "frozen" Dirichlet
  1972. random variable:
  1973. >>> rv = dirichlet(alpha)
  1974. >>> # Frozen object with the same methods but holding the given
  1975. >>> # concentration parameters fixed.
  1976. """
  1977. def __init__(self, seed=None):
  1978. super().__init__(seed)
  1979. self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params)
  1980. def __call__(self, alpha, seed=None):
  1981. return dirichlet_frozen(alpha, seed=seed)
  1982. def _logpdf(self, x, alpha):
  1983. """Log of the Dirichlet probability density function.
  1984. Parameters
  1985. ----------
  1986. x : ndarray
  1987. Points at which to evaluate the log of the probability
  1988. density function
  1989. %(_dirichlet_doc_default_callparams)s
  1990. Notes
  1991. -----
  1992. As this function does no argument checking, it should not be
  1993. called directly; use 'logpdf' instead.
  1994. """
  1995. lnB = _lnB(alpha)
  1996. return - lnB + np.sum((xlogy(alpha - 1, x.T)).T, 0)
  1997. def logpdf(self, x, alpha):
  1998. """Log of the Dirichlet probability density function.
  1999. Parameters
  2000. ----------
  2001. x : array_like
  2002. Quantiles, with the last axis of `x` denoting the components.
  2003. %(_dirichlet_doc_default_callparams)s
  2004. Returns
  2005. -------
  2006. pdf : ndarray or scalar
  2007. Log of the probability density function evaluated at `x`.
  2008. """
  2009. alpha = _dirichlet_check_parameters(alpha)
  2010. x = _dirichlet_check_input(alpha, x)
  2011. out = self._logpdf(x, alpha)
  2012. return _squeeze_output(out)
  2013. def pdf(self, x, alpha):
  2014. """The Dirichlet probability density function.
  2015. Parameters
  2016. ----------
  2017. x : array_like
  2018. Quantiles, with the last axis of `x` denoting the components.
  2019. %(_dirichlet_doc_default_callparams)s
  2020. Returns
  2021. -------
  2022. pdf : ndarray or scalar
  2023. The probability density function evaluated at `x`.
  2024. """
  2025. alpha = _dirichlet_check_parameters(alpha)
  2026. x = _dirichlet_check_input(alpha, x)
  2027. out = np.exp(self._logpdf(x, alpha))
  2028. return _squeeze_output(out)
  2029. def mean(self, alpha):
  2030. """Mean of the Dirichlet distribution.
  2031. Parameters
  2032. ----------
  2033. %(_dirichlet_doc_default_callparams)s
  2034. Returns
  2035. -------
  2036. mu : ndarray or scalar
  2037. Mean of the Dirichlet distribution.
  2038. """
  2039. alpha = _dirichlet_check_parameters(alpha)
  2040. out = alpha / (np.sum(alpha))
  2041. return _squeeze_output(out)
  2042. def var(self, alpha):
  2043. """Variance of the Dirichlet distribution.
  2044. Parameters
  2045. ----------
  2046. %(_dirichlet_doc_default_callparams)s
  2047. Returns
  2048. -------
  2049. v : ndarray or scalar
  2050. Variance of the Dirichlet distribution.
  2051. """
  2052. alpha = _dirichlet_check_parameters(alpha)
  2053. alpha0 = np.sum(alpha)
  2054. out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1))
  2055. return _squeeze_output(out)
  2056. def cov(self, alpha):
  2057. """Covariance matrix of the Dirichlet distribution.
  2058. Parameters
  2059. ----------
  2060. %(_dirichlet_doc_default_callparams)s
  2061. Returns
  2062. -------
  2063. cov : ndarray
  2064. The covariance matrix of the distribution.
  2065. """
  2066. alpha = _dirichlet_check_parameters(alpha)
  2067. alpha0 = np.sum(alpha)
  2068. a = alpha / alpha0
  2069. cov = (np.diag(a) - np.outer(a, a)) / (alpha0 + 1)
  2070. return _squeeze_output(cov)
  2071. def entropy(self, alpha):
  2072. """
  2073. Differential entropy of the Dirichlet distribution.
  2074. Parameters
  2075. ----------
  2076. %(_dirichlet_doc_default_callparams)s
  2077. Returns
  2078. -------
  2079. h : scalar
  2080. Entropy of the Dirichlet distribution
  2081. """
  2082. alpha = _dirichlet_check_parameters(alpha)
  2083. alpha0 = np.sum(alpha)
  2084. lnB = _lnB(alpha)
  2085. K = alpha.shape[0]
  2086. out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum(
  2087. (alpha - 1) * scipy.special.psi(alpha))
  2088. return _squeeze_output(out)
  2089. def rvs(self, alpha, size=1, random_state=None):
  2090. """
  2091. Draw random samples from a Dirichlet distribution.
  2092. Parameters
  2093. ----------
  2094. %(_dirichlet_doc_default_callparams)s
  2095. size : int, optional
  2096. Number of samples to draw (default 1).
  2097. %(_doc_random_state)s
  2098. Returns
  2099. -------
  2100. rvs : ndarray or scalar
  2101. Random variates of size (`size`, `N`), where `N` is the
  2102. dimension of the random variable.
  2103. """
  2104. alpha = _dirichlet_check_parameters(alpha)
  2105. random_state = self._get_random_state(random_state)
  2106. return random_state.dirichlet(alpha, size=size)
  2107. dirichlet = dirichlet_gen()
  2108. class dirichlet_frozen(multi_rv_frozen):
  2109. __class_getitem__ = None
  2110. def __init__(self, alpha, seed=None):
  2111. self.alpha = _dirichlet_check_parameters(alpha)
  2112. self._dist = dirichlet_gen(seed)
  2113. def logpdf(self, x):
  2114. return self._dist.logpdf(x, self.alpha)
  2115. def pdf(self, x):
  2116. return self._dist.pdf(x, self.alpha)
  2117. def mean(self):
  2118. return self._dist.mean(self.alpha)
  2119. def var(self):
  2120. return self._dist.var(self.alpha)
  2121. def cov(self):
  2122. return self._dist.cov(self.alpha)
  2123. def entropy(self):
  2124. return self._dist.entropy(self.alpha)
  2125. def rvs(self, size=1, random_state=None):
  2126. return self._dist.rvs(self.alpha, size, random_state)
  2127. # Set frozen generator docstrings from corresponding docstrings in
  2128. # multivariate_normal_gen and fill in default strings in class docstrings
  2129. for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'cov', 'entropy']:
  2130. method = dirichlet_gen.__dict__[name]
  2131. method_frozen = dirichlet_frozen.__dict__[name]
  2132. method_frozen.__doc__ = doccer.docformat(
  2133. method.__doc__, dirichlet_docdict_noparams)
  2134. method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params)
  2135. _wishart_doc_default_callparams = """\
  2136. df : int
  2137. Degrees of freedom, must be greater than or equal to dimension of the
  2138. scale matrix
  2139. scale : array_like
  2140. Symmetric positive definite scale matrix of the distribution
  2141. """
  2142. _wishart_doc_callparams_note = ""
  2143. _wishart_doc_frozen_callparams = ""
  2144. _wishart_doc_frozen_callparams_note = """\
  2145. See class definition for a detailed description of parameters."""
  2146. wishart_docdict_params = {
  2147. '_doc_default_callparams': _wishart_doc_default_callparams,
  2148. '_doc_callparams_note': _wishart_doc_callparams_note,
  2149. '_doc_random_state': _doc_random_state
  2150. }
  2151. wishart_docdict_noparams = {
  2152. '_doc_default_callparams': _wishart_doc_frozen_callparams,
  2153. '_doc_callparams_note': _wishart_doc_frozen_callparams_note,
  2154. '_doc_random_state': _doc_random_state
  2155. }
  2156. class wishart_gen(multi_rv_generic):
  2157. r"""A Wishart random variable.
  2158. The `df` keyword specifies the degrees of freedom. The `scale` keyword
  2159. specifies the scale matrix, which must be symmetric and positive definite.
  2160. In this context, the scale matrix is often interpreted in terms of a
  2161. multivariate normal precision matrix (the inverse of the covariance
  2162. matrix). These arguments must satisfy the relationship
  2163. ``df > scale.ndim - 1``, but see notes on using the `rvs` method with
  2164. ``df < scale.ndim``.
  2165. Methods
  2166. -------
  2167. pdf(x, df, scale)
  2168. Probability density function.
  2169. logpdf(x, df, scale)
  2170. Log of the probability density function.
  2171. rvs(df, scale, size=1, random_state=None)
  2172. Draw random samples from a Wishart distribution.
  2173. entropy()
  2174. Compute the differential entropy of the Wishart distribution.
  2175. Parameters
  2176. ----------
  2177. %(_doc_default_callparams)s
  2178. %(_doc_random_state)s
  2179. Raises
  2180. ------
  2181. scipy.linalg.LinAlgError
  2182. If the scale matrix `scale` is not positive definite.
  2183. See Also
  2184. --------
  2185. invwishart, chi2
  2186. Notes
  2187. -----
  2188. %(_doc_callparams_note)s
  2189. The scale matrix `scale` must be a symmetric positive definite
  2190. matrix. Singular matrices, including the symmetric positive semi-definite
  2191. case, are not supported. Symmetry is not checked; only the lower triangular
  2192. portion is used.
  2193. The Wishart distribution is often denoted
  2194. .. math::
  2195. W_p(\nu, \Sigma)
  2196. where :math:`\nu` is the degrees of freedom and :math:`\Sigma` is the
  2197. :math:`p \times p` scale matrix.
  2198. The probability density function for `wishart` has support over positive
  2199. definite matrices :math:`S`; if :math:`S \sim W_p(\nu, \Sigma)`, then
  2200. its PDF is given by:
  2201. .. math::
  2202. f(S) = \frac{|S|^{\frac{\nu - p - 1}{2}}}{2^{ \frac{\nu p}{2} }
  2203. |\Sigma|^\frac{\nu}{2} \Gamma_p \left ( \frac{\nu}{2} \right )}
  2204. \exp\left( -tr(\Sigma^{-1} S) / 2 \right)
  2205. If :math:`S \sim W_p(\nu, \Sigma)` (Wishart) then
  2206. :math:`S^{-1} \sim W_p^{-1}(\nu, \Sigma^{-1})` (inverse Wishart).
  2207. If the scale matrix is 1-dimensional and equal to one, then the Wishart
  2208. distribution :math:`W_1(\nu, 1)` collapses to the :math:`\chi^2(\nu)`
  2209. distribution.
  2210. The algorithm [2]_ implemented by the `rvs` method may
  2211. produce numerically singular matrices with :math:`p - 1 < \nu < p`; the
  2212. user may wish to check for this condition and generate replacement samples
  2213. as necessary.
  2214. .. versionadded:: 0.16.0
  2215. References
  2216. ----------
  2217. .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
  2218. Wiley, 1983.
  2219. .. [2] W.B. Smith and R.R. Hocking, "Algorithm AS 53: Wishart Variate
  2220. Generator", Applied Statistics, vol. 21, pp. 341-345, 1972.
  2221. Examples
  2222. --------
  2223. >>> import numpy as np
  2224. >>> import matplotlib.pyplot as plt
  2225. >>> from scipy.stats import wishart, chi2
  2226. >>> x = np.linspace(1e-5, 8, 100)
  2227. >>> w = wishart.pdf(x, df=3, scale=1); w[:5]
  2228. array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
  2229. >>> c = chi2.pdf(x, 3); c[:5]
  2230. array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
  2231. >>> plt.plot(x, w)
  2232. >>> plt.show()
  2233. The input quantiles can be any shape of array, as long as the last
  2234. axis labels the components.
  2235. Alternatively, the object may be called (as a function) to fix the degrees
  2236. of freedom and scale parameters, returning a "frozen" Wishart random
  2237. variable:
  2238. >>> rv = wishart(df=1, scale=1)
  2239. >>> # Frozen object with the same methods but holding the given
  2240. >>> # degrees of freedom and scale fixed.
  2241. """
  2242. def __init__(self, seed=None):
  2243. super().__init__(seed)
  2244. self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
  2245. def __call__(self, df=None, scale=None, seed=None):
  2246. """Create a frozen Wishart distribution.
  2247. See `wishart_frozen` for more information.
  2248. """
  2249. return wishart_frozen(df, scale, seed)
  2250. def _process_parameters(self, df, scale):
  2251. if scale is None:
  2252. scale = 1.0
  2253. scale = np.asarray(scale, dtype=float)
  2254. if scale.ndim == 0:
  2255. scale = scale[np.newaxis, np.newaxis]
  2256. elif scale.ndim == 1:
  2257. scale = np.diag(scale)
  2258. elif scale.ndim == 2 and not scale.shape[0] == scale.shape[1]:
  2259. raise ValueError("Array 'scale' must be square if it is two dimensional,"
  2260. f" but scale.scale = {str(scale.shape)}.")
  2261. elif scale.ndim > 2:
  2262. raise ValueError(f"Array 'scale' must be at most two-dimensional, "
  2263. f"but scale.ndim = {scale.ndim}")
  2264. dim = scale.shape[0]
  2265. if df is None:
  2266. df = dim
  2267. elif not np.isscalar(df):
  2268. raise ValueError("Degrees of freedom must be a scalar.")
  2269. elif df <= dim - 1:
  2270. raise ValueError("Degrees of freedom must be greater than the "
  2271. "dimension of scale matrix minus 1.")
  2272. return dim, df, scale
  2273. def _process_quantiles(self, x, dim):
  2274. """
  2275. Adjust quantiles array so that last axis labels the components of
  2276. each data point.
  2277. """
  2278. x = np.asarray(x, dtype=float)
  2279. if x.ndim == 0:
  2280. x = x * np.eye(dim)[:, :, np.newaxis]
  2281. if x.ndim == 1:
  2282. if dim == 1:
  2283. x = x[np.newaxis, np.newaxis, :]
  2284. else:
  2285. x = np.diag(x)[:, :, np.newaxis]
  2286. elif x.ndim == 2:
  2287. if not x.shape[0] == x.shape[1]:
  2288. raise ValueError(
  2289. "Quantiles must be square if they are two dimensional,"
  2290. f" but x.shape = {str(x.shape)}.")
  2291. x = x[:, :, np.newaxis]
  2292. elif x.ndim == 3:
  2293. if not x.shape[0] == x.shape[1]:
  2294. raise ValueError(
  2295. "Quantiles must be square in the first two dimensions "
  2296. f"if they are three dimensional, but x.shape = {str(x.shape)}.")
  2297. elif x.ndim > 3:
  2298. raise ValueError(f"Quantiles must be at most two-dimensional with an "
  2299. f"additional dimension for multiple components, "
  2300. f"but x.ndim = {x.ndim}")
  2301. # Now we have 3-dim array; should have shape [dim, dim, *]
  2302. if not x.shape[0:2] == (dim, dim):
  2303. raise ValueError('Quantiles have incompatible dimensions: should'
  2304. f' be {(dim, dim)}, got {x.shape[0:2]}.')
  2305. return x
  2306. def _process_size(self, size):
  2307. size = np.asarray(size)
  2308. if size.ndim == 0:
  2309. size = size[np.newaxis]
  2310. elif size.ndim > 1:
  2311. raise ValueError('Size must be an integer or tuple of integers;'
  2312. ' thus must have dimension <= 1.'
  2313. f' Got size.ndim = {str(tuple(size))}')
  2314. n = size.prod()
  2315. shape = tuple(size)
  2316. return n, shape
  2317. def _logpdf(self, x, dim, df, scale, log_det_scale, C):
  2318. """Log of the Wishart probability density function.
  2319. Parameters
  2320. ----------
  2321. x : ndarray
  2322. Points at which to evaluate the log of the probability
  2323. density function
  2324. dim : int
  2325. Dimension of the scale matrix
  2326. df : int
  2327. Degrees of freedom
  2328. scale : ndarray
  2329. Scale matrix
  2330. log_det_scale : float
  2331. Logarithm of the determinant of the scale matrix
  2332. C : ndarray
  2333. Cholesky factorization of the scale matrix, lower triangular.
  2334. Notes
  2335. -----
  2336. As this function does no argument checking, it should not be
  2337. called directly; use 'logpdf' instead.
  2338. """
  2339. # log determinant of x
  2340. # Note: x has components along the last axis, so that x.T has
  2341. # components alone the 0-th axis. Then since det(A) = det(A'), this
  2342. # gives us a 1-dim vector of determinants
  2343. # Retrieve tr(scale^{-1} x)
  2344. log_det_x = np.empty(x.shape[-1])
  2345. scale_inv_x = np.empty(x.shape)
  2346. tr_scale_inv_x = np.empty(x.shape[-1])
  2347. for i in range(x.shape[-1]):
  2348. _, log_det_x[i] = self._cholesky_logdet(x[:, :, i])
  2349. scale_inv_x[:, :, i] = scipy.linalg.cho_solve((C, True), x[:, :, i])
  2350. tr_scale_inv_x[i] = scale_inv_x[:, :, i].trace()
  2351. # Log PDF
  2352. out = ((0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) -
  2353. (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale +
  2354. multigammaln(0.5*df, dim)))
  2355. return out
  2356. def logpdf(self, x, df, scale):
  2357. """Log of the Wishart probability density function.
  2358. Parameters
  2359. ----------
  2360. x : array_like
  2361. Quantiles, with the last axis of `x` denoting the components.
  2362. Each quantile must be a symmetric positive definite matrix.
  2363. %(_doc_default_callparams)s
  2364. Returns
  2365. -------
  2366. pdf : ndarray
  2367. Log of the probability density function evaluated at `x`
  2368. Notes
  2369. -----
  2370. %(_doc_callparams_note)s
  2371. """
  2372. dim, df, scale = self._process_parameters(df, scale)
  2373. x = self._process_quantiles(x, dim)
  2374. # Cholesky decomposition of scale, get log(det(scale))
  2375. C, log_det_scale = self._cholesky_logdet(scale)
  2376. out = self._logpdf(x, dim, df, scale, log_det_scale, C)
  2377. return _squeeze_output(out)
  2378. def pdf(self, x, df, scale):
  2379. """Wishart probability density function.
  2380. Parameters
  2381. ----------
  2382. x : array_like
  2383. Quantiles, with the last axis of `x` denoting the components.
  2384. Each quantile must be a symmetric positive definite matrix.
  2385. %(_doc_default_callparams)s
  2386. Returns
  2387. -------
  2388. pdf : ndarray
  2389. Probability density function evaluated at `x`
  2390. Notes
  2391. -----
  2392. %(_doc_callparams_note)s
  2393. """
  2394. return np.exp(self.logpdf(x, df, scale))
  2395. def _mean(self, dim, df, scale):
  2396. """Mean of the Wishart distribution.
  2397. Parameters
  2398. ----------
  2399. dim : int
  2400. Dimension of the scale matrix
  2401. %(_doc_default_callparams)s
  2402. Notes
  2403. -----
  2404. As this function does no argument checking, it should not be
  2405. called directly; use 'mean' instead.
  2406. """
  2407. return df * scale
  2408. def mean(self, df, scale):
  2409. """Mean of the Wishart distribution.
  2410. Parameters
  2411. ----------
  2412. %(_doc_default_callparams)s
  2413. Returns
  2414. -------
  2415. mean : float
  2416. The mean of the distribution
  2417. """
  2418. dim, df, scale = self._process_parameters(df, scale)
  2419. out = self._mean(dim, df, scale)
  2420. return _squeeze_output(out)
  2421. def _mode(self, dim, df, scale):
  2422. """Mode of the Wishart distribution.
  2423. Parameters
  2424. ----------
  2425. dim : int
  2426. Dimension of the scale matrix
  2427. %(_doc_default_callparams)s
  2428. Notes
  2429. -----
  2430. As this function does no argument checking, it should not be
  2431. called directly; use 'mode' instead.
  2432. """
  2433. if df >= dim + 1:
  2434. out = (df-dim-1) * scale
  2435. else:
  2436. out = None
  2437. return out
  2438. def mode(self, df, scale):
  2439. """Mode of the Wishart distribution
  2440. Only valid if the degrees of freedom are greater than the dimension of
  2441. the scale matrix.
  2442. Parameters
  2443. ----------
  2444. %(_doc_default_callparams)s
  2445. Returns
  2446. -------
  2447. mode : float or None
  2448. The Mode of the distribution
  2449. """
  2450. dim, df, scale = self._process_parameters(df, scale)
  2451. out = self._mode(dim, df, scale)
  2452. return _squeeze_output(out) if out is not None else out
  2453. def _var(self, dim, df, scale):
  2454. """Variance of the Wishart distribution.
  2455. Parameters
  2456. ----------
  2457. dim : int
  2458. Dimension of the scale matrix
  2459. %(_doc_default_callparams)s
  2460. Notes
  2461. -----
  2462. As this function does no argument checking, it should not be
  2463. called directly; use 'var' instead.
  2464. """
  2465. var = scale**2
  2466. diag = scale.diagonal() # 1 x dim array
  2467. var += np.outer(diag, diag)
  2468. var *= df
  2469. return var
  2470. def var(self, df, scale):
  2471. """Variance of the Wishart distribution.
  2472. Parameters
  2473. ----------
  2474. %(_doc_default_callparams)s
  2475. Returns
  2476. -------
  2477. var : float
  2478. The variance of the distribution
  2479. """
  2480. dim, df, scale = self._process_parameters(df, scale)
  2481. out = self._var(dim, df, scale)
  2482. return _squeeze_output(out)
  2483. def _standard_rvs(self, n, shape, dim, df, random_state):
  2484. """
  2485. Parameters
  2486. ----------
  2487. n : integer
  2488. Number of variates to generate
  2489. shape : iterable
  2490. Shape of the variates to generate
  2491. dim : int
  2492. Dimension of the scale matrix
  2493. df : int
  2494. Degrees of freedom
  2495. random_state : {None, int, `numpy.random.Generator`,
  2496. `numpy.random.RandomState`}, optional
  2497. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  2498. singleton is used.
  2499. If `seed` is an int, a new ``RandomState`` instance is used,
  2500. seeded with `seed`.
  2501. If `seed` is already a ``Generator`` or ``RandomState`` instance
  2502. then that instance is used.
  2503. Notes
  2504. -----
  2505. As this function does no argument checking, it should not be
  2506. called directly; use 'rvs' instead.
  2507. """
  2508. # Random normal variates for off-diagonal elements
  2509. n_tril = dim * (dim-1) // 2
  2510. covariances = random_state.normal(
  2511. size=n*n_tril).reshape(shape+(n_tril,))
  2512. # Random chi-square variates for diagonal elements
  2513. variances = (np.r_[[random_state.chisquare(df-(i+1)+1, size=n)**0.5
  2514. for i in range(dim)]].reshape((dim,) +
  2515. shape[::-1]).T)
  2516. # Create the A matri(ces) - lower triangular
  2517. A = np.zeros(shape + (dim, dim))
  2518. # Input the covariances
  2519. size_idx = tuple([slice(None, None, None)]*len(shape))
  2520. tril_idx = np.tril_indices(dim, k=-1)
  2521. A[size_idx + tril_idx] = covariances
  2522. # Input the variances
  2523. diag_idx = np.diag_indices(dim)
  2524. A[size_idx + diag_idx] = variances
  2525. return A
  2526. def _rvs(self, n, shape, dim, df, C, random_state):
  2527. """Draw random samples from a Wishart distribution.
  2528. Parameters
  2529. ----------
  2530. n : integer
  2531. Number of variates to generate
  2532. shape : iterable
  2533. Shape of the variates to generate
  2534. dim : int
  2535. Dimension of the scale matrix
  2536. df : int
  2537. Degrees of freedom
  2538. C : ndarray
  2539. Cholesky factorization of the scale matrix, lower triangular.
  2540. %(_doc_random_state)s
  2541. Notes
  2542. -----
  2543. As this function does no argument checking, it should not be
  2544. called directly; use 'rvs' instead.
  2545. """
  2546. random_state = self._get_random_state(random_state)
  2547. # Calculate the matrices A, which are actually lower triangular
  2548. # Cholesky factorizations of a matrix B such that B ~ W(df, I)
  2549. A = self._standard_rvs(n, shape, dim, df, random_state)
  2550. # Calculate SA = C A A' C', where SA ~ W(df, scale)
  2551. # Note: this is the product of a (lower) (lower) (lower)' (lower)'
  2552. # or, denoting B = AA', it is C B C' where C is the lower
  2553. # triangular Cholesky factorization of the scale matrix.
  2554. # this appears to conflict with the instructions in [1]_, which
  2555. # suggest that it should be D' B D where D is the lower
  2556. # triangular factorization of the scale matrix. However, it is
  2557. # meant to refer to the Bartlett (1933) representation of a
  2558. # Wishart random variate as L A A' L' where L is lower triangular
  2559. # so it appears that understanding D' to be upper triangular
  2560. # is either a typo in or misreading of [1]_.
  2561. for index in np.ndindex(shape):
  2562. CA = np.dot(C, A[index])
  2563. A[index] = np.dot(CA, CA.T)
  2564. return A
  2565. def rvs(self, df, scale, size=1, random_state=None):
  2566. """Draw random samples from a Wishart distribution.
  2567. Parameters
  2568. ----------
  2569. %(_doc_default_callparams)s
  2570. size : integer or iterable of integers, optional
  2571. Number of samples to draw (default 1).
  2572. %(_doc_random_state)s
  2573. Returns
  2574. -------
  2575. rvs : ndarray
  2576. Random variates of shape (`size`) + (``dim``, ``dim``), where
  2577. ``dim`` is the dimension of the scale matrix.
  2578. Notes
  2579. -----
  2580. %(_doc_callparams_note)s
  2581. """
  2582. n, shape = self._process_size(size)
  2583. dim, df, scale = self._process_parameters(df, scale)
  2584. # Cholesky decomposition of scale
  2585. C = scipy.linalg.cholesky(scale, lower=True)
  2586. out = self._rvs(n, shape, dim, df, C, random_state)
  2587. return _squeeze_output(out)
  2588. def _entropy(self, dim, df, log_det_scale):
  2589. """Compute the differential entropy of the Wishart.
  2590. Parameters
  2591. ----------
  2592. dim : int
  2593. Dimension of the scale matrix
  2594. df : int
  2595. Degrees of freedom
  2596. log_det_scale : float
  2597. Logarithm of the determinant of the scale matrix
  2598. Notes
  2599. -----
  2600. As this function does no argument checking, it should not be
  2601. called directly; use 'entropy' instead.
  2602. """
  2603. return (
  2604. 0.5 * (dim+1) * log_det_scale +
  2605. 0.5 * dim * (dim+1) * _LOG_2 +
  2606. multigammaln(0.5*df, dim) -
  2607. 0.5 * (df - dim - 1) * np.sum(
  2608. [psi(0.5*(df + 1 - (i+1))) for i in range(dim)]
  2609. ) +
  2610. 0.5 * df * dim
  2611. )
  2612. def entropy(self, df, scale):
  2613. """Compute the differential entropy of the Wishart.
  2614. Parameters
  2615. ----------
  2616. %(_doc_default_callparams)s
  2617. Returns
  2618. -------
  2619. h : scalar
  2620. Entropy of the Wishart distribution
  2621. Notes
  2622. -----
  2623. %(_doc_callparams_note)s
  2624. """
  2625. dim, df, scale = self._process_parameters(df, scale)
  2626. _, log_det_scale = self._cholesky_logdet(scale)
  2627. return self._entropy(dim, df, log_det_scale)
  2628. def _cholesky_logdet(self, scale):
  2629. """Compute Cholesky decomposition and determine (log(det(scale)).
  2630. Parameters
  2631. ----------
  2632. scale : ndarray
  2633. Scale matrix.
  2634. Returns
  2635. -------
  2636. c_decomp : ndarray
  2637. The Cholesky decomposition of `scale`.
  2638. logdet : scalar
  2639. The log of the determinant of `scale`.
  2640. Notes
  2641. -----
  2642. This computation of ``logdet`` is equivalent to
  2643. ``np.linalg.slogdet(scale)``. It is ~2x faster though.
  2644. """
  2645. c_decomp = scipy.linalg.cholesky(scale, lower=True)
  2646. logdet = 2 * np.sum(np.log(c_decomp.diagonal()))
  2647. return c_decomp, logdet
  2648. wishart = wishart_gen()
  2649. class wishart_frozen(multi_rv_frozen):
  2650. """Create a frozen Wishart distribution.
  2651. Parameters
  2652. ----------
  2653. df : array_like
  2654. Degrees of freedom of the distribution
  2655. scale : array_like
  2656. Scale matrix of the distribution
  2657. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  2658. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  2659. singleton is used.
  2660. If `seed` is an int, a new ``RandomState`` instance is used,
  2661. seeded with `seed`.
  2662. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  2663. that instance is used.
  2664. """
  2665. __class_getitem__ = None
  2666. def __init__(self, df, scale, seed=None):
  2667. self._dist = wishart_gen(seed)
  2668. self.dim, self.df, self.scale = self._dist._process_parameters(
  2669. df, scale)
  2670. self.C, self.log_det_scale = self._dist._cholesky_logdet(self.scale)
  2671. def logpdf(self, x):
  2672. x = self._dist._process_quantiles(x, self.dim)
  2673. out = self._dist._logpdf(x, self.dim, self.df, self.scale,
  2674. self.log_det_scale, self.C)
  2675. return _squeeze_output(out)
  2676. def pdf(self, x):
  2677. return np.exp(self.logpdf(x))
  2678. def mean(self):
  2679. out = self._dist._mean(self.dim, self.df, self.scale)
  2680. return _squeeze_output(out)
  2681. def mode(self):
  2682. out = self._dist._mode(self.dim, self.df, self.scale)
  2683. return _squeeze_output(out) if out is not None else out
  2684. def var(self):
  2685. out = self._dist._var(self.dim, self.df, self.scale)
  2686. return _squeeze_output(out)
  2687. def rvs(self, size=1, random_state=None):
  2688. n, shape = self._dist._process_size(size)
  2689. out = self._dist._rvs(n, shape, self.dim, self.df,
  2690. self.C, random_state)
  2691. return _squeeze_output(out)
  2692. def entropy(self):
  2693. return self._dist._entropy(self.dim, self.df, self.log_det_scale)
  2694. # Set frozen generator docstrings from corresponding docstrings in
  2695. # Wishart and fill in default strings in class docstrings
  2696. for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs', 'entropy']:
  2697. method = wishart_gen.__dict__[name]
  2698. method_frozen = wishart_frozen.__dict__[name]
  2699. method_frozen.__doc__ = doccer.docformat(
  2700. method.__doc__, wishart_docdict_noparams)
  2701. method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
  2702. class invwishart_gen(wishart_gen):
  2703. r"""An inverse Wishart random variable.
  2704. The `df` keyword specifies the degrees of freedom. The `scale` keyword
  2705. specifies the scale matrix, which must be symmetric and positive definite.
  2706. In this context, the scale matrix is often interpreted in terms of a
  2707. multivariate normal covariance matrix.
  2708. Methods
  2709. -------
  2710. pdf(x, df, scale)
  2711. Probability density function.
  2712. logpdf(x, df, scale)
  2713. Log of the probability density function.
  2714. rvs(df, scale, size=1, random_state=None)
  2715. Draw random samples from an inverse Wishart distribution.
  2716. entropy(df, scale)
  2717. Differential entropy of the distribution.
  2718. Parameters
  2719. ----------
  2720. %(_doc_default_callparams)s
  2721. %(_doc_random_state)s
  2722. Raises
  2723. ------
  2724. scipy.linalg.LinAlgError
  2725. If the scale matrix `scale` is not positive definite.
  2726. See Also
  2727. --------
  2728. wishart
  2729. Notes
  2730. -----
  2731. %(_doc_callparams_note)s
  2732. The scale matrix `scale` must be a symmetric positive definite
  2733. matrix. Singular matrices, including the symmetric positive semi-definite
  2734. case, are not supported. Symmetry is not checked; only the lower triangular
  2735. portion is used.
  2736. The inverse Wishart distribution is often denoted
  2737. .. math::
  2738. W_p^{-1}(\nu, \Psi)
  2739. where :math:`\nu` is the degrees of freedom and :math:`\Psi` is the
  2740. :math:`p \times p` scale matrix.
  2741. The probability density function for `invwishart` has support over positive
  2742. definite matrices :math:`S`; if :math:`S \sim W^{-1}_p(\nu, \Sigma)`,
  2743. then its PDF is given by:
  2744. .. math::
  2745. f(S) = \frac{|\Sigma|^\frac{\nu}{2}}{2^{ \frac{\nu p}{2} }
  2746. |S|^{\frac{\nu + p + 1}{2}} \Gamma_p \left(\frac{\nu}{2} \right)}
  2747. \exp\left( -tr(\Sigma S^{-1}) / 2 \right)
  2748. If :math:`S \sim W_p^{-1}(\nu, \Psi)` (inverse Wishart) then
  2749. :math:`S^{-1} \sim W_p(\nu, \Psi^{-1})` (Wishart).
  2750. If the scale matrix is 1-dimensional and equal to one, then the inverse
  2751. Wishart distribution :math:`W_1(\nu, 1)` collapses to the
  2752. inverse Gamma distribution with parameters shape = :math:`\frac{\nu}{2}`
  2753. and scale = :math:`\frac{1}{2}`.
  2754. Instead of inverting a randomly generated Wishart matrix as described in [2],
  2755. here the algorithm in [4] is used to directly generate a random inverse-Wishart
  2756. matrix without inversion.
  2757. .. versionadded:: 0.16.0
  2758. References
  2759. ----------
  2760. .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
  2761. Wiley, 1983.
  2762. .. [2] M.C. Jones, "Generating Inverse Wishart Matrices", Communications
  2763. in Statistics - Simulation and Computation, vol. 14.2, pp.511-514,
  2764. 1985.
  2765. .. [3] Gupta, M. and Srivastava, S. "Parametric Bayesian Estimation of
  2766. Differential Entropy and Relative Entropy". Entropy 12, 818 - 843.
  2767. 2010.
  2768. .. [4] S.D. Axen, "Efficiently generating inverse-Wishart matrices and
  2769. their Cholesky factors", :arXiv:`2310.15884v1`. 2023.
  2770. Examples
  2771. --------
  2772. >>> import numpy as np
  2773. >>> import matplotlib.pyplot as plt
  2774. >>> from scipy.stats import invwishart, invgamma
  2775. >>> x = np.linspace(0.01, 1, 100)
  2776. >>> iw = invwishart.pdf(x, df=6, scale=1)
  2777. >>> iw[:3]
  2778. array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
  2779. >>> ig = invgamma.pdf(x, 6/2., scale=1./2)
  2780. >>> ig[:3]
  2781. array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
  2782. >>> plt.plot(x, iw)
  2783. >>> plt.show()
  2784. The input quantiles can be any shape of array, as long as the last
  2785. axis labels the components.
  2786. Alternatively, the object may be called (as a function) to fix the degrees
  2787. of freedom and scale parameters, returning a "frozen" inverse Wishart
  2788. random variable:
  2789. >>> rv = invwishart(df=1, scale=1)
  2790. >>> # Frozen object with the same methods but holding the given
  2791. >>> # degrees of freedom and scale fixed.
  2792. """
  2793. def __init__(self, seed=None):
  2794. super().__init__(seed)
  2795. self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
  2796. def __call__(self, df=None, scale=None, seed=None):
  2797. """Create a frozen inverse Wishart distribution.
  2798. See `invwishart_frozen` for more information.
  2799. """
  2800. return invwishart_frozen(df, scale, seed)
  2801. def _logpdf(self, x, dim, df, log_det_scale, C):
  2802. """Log of the inverse Wishart probability density function.
  2803. Parameters
  2804. ----------
  2805. x : ndarray
  2806. Points at which to evaluate the log of the probability
  2807. density function.
  2808. dim : int
  2809. Dimension of the scale matrix
  2810. df : int
  2811. Degrees of freedom
  2812. log_det_scale : float
  2813. Logarithm of the determinant of the scale matrix
  2814. C : ndarray
  2815. Cholesky factorization of the scale matrix, lower triangular.
  2816. Notes
  2817. -----
  2818. As this function does no argument checking, it should not be
  2819. called directly; use 'logpdf' instead.
  2820. """
  2821. # Retrieve tr(scale x^{-1})
  2822. log_det_x = np.empty(x.shape[-1])
  2823. tr_scale_x_inv = np.empty(x.shape[-1])
  2824. trsm = get_blas_funcs(('trsm'), (x,))
  2825. if dim > 1:
  2826. for i in range(x.shape[-1]):
  2827. Cx, log_det_x[i] = self._cholesky_logdet(x[:, :, i])
  2828. A = trsm(1., Cx, C, side=0, lower=True)
  2829. tr_scale_x_inv[i] = np.linalg.norm(A)**2
  2830. else:
  2831. log_det_x[:] = np.log(x[0, 0])
  2832. tr_scale_x_inv[:] = C[0, 0]**2 / x[0, 0]
  2833. # Log PDF
  2834. out = ((0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) -
  2835. (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) -
  2836. multigammaln(0.5*df, dim))
  2837. return out
  2838. def logpdf(self, x, df, scale):
  2839. """Log of the inverse Wishart probability density function.
  2840. Parameters
  2841. ----------
  2842. x : array_like
  2843. Quantiles, with the last axis of `x` denoting the components.
  2844. Each quantile must be a symmetric positive definite matrix.
  2845. %(_doc_default_callparams)s
  2846. Returns
  2847. -------
  2848. pdf : ndarray
  2849. Log of the probability density function evaluated at `x`
  2850. Notes
  2851. -----
  2852. %(_doc_callparams_note)s
  2853. """
  2854. dim, df, scale = self._process_parameters(df, scale)
  2855. x = self._process_quantiles(x, dim)
  2856. C, log_det_scale = self._cholesky_logdet(scale)
  2857. out = self._logpdf(x, dim, df, log_det_scale, C)
  2858. return _squeeze_output(out)
  2859. def pdf(self, x, df, scale):
  2860. """Inverse Wishart probability density function.
  2861. Parameters
  2862. ----------
  2863. x : array_like
  2864. Quantiles, with the last axis of `x` denoting the components.
  2865. Each quantile must be a symmetric positive definite matrix.
  2866. %(_doc_default_callparams)s
  2867. Returns
  2868. -------
  2869. pdf : ndarray
  2870. Probability density function evaluated at `x`
  2871. Notes
  2872. -----
  2873. %(_doc_callparams_note)s
  2874. """
  2875. return np.exp(self.logpdf(x, df, scale))
  2876. def _mean(self, dim, df, scale):
  2877. """Mean of the inverse Wishart distribution.
  2878. Parameters
  2879. ----------
  2880. dim : int
  2881. Dimension of the scale matrix
  2882. %(_doc_default_callparams)s
  2883. Notes
  2884. -----
  2885. As this function does no argument checking, it should not be
  2886. called directly; use 'mean' instead.
  2887. """
  2888. if df > dim + 1:
  2889. out = scale / (df - dim - 1)
  2890. else:
  2891. out = None
  2892. return out
  2893. def mean(self, df, scale):
  2894. """Mean of the inverse Wishart distribution.
  2895. Only valid if the degrees of freedom are greater than the dimension of
  2896. the scale matrix plus one.
  2897. Parameters
  2898. ----------
  2899. %(_doc_default_callparams)s
  2900. Returns
  2901. -------
  2902. mean : float or None
  2903. The mean of the distribution
  2904. """
  2905. dim, df, scale = self._process_parameters(df, scale)
  2906. out = self._mean(dim, df, scale)
  2907. return _squeeze_output(out) if out is not None else out
  2908. def _mode(self, dim, df, scale):
  2909. """Mode of the inverse Wishart distribution.
  2910. Parameters
  2911. ----------
  2912. dim : int
  2913. Dimension of the scale matrix
  2914. %(_doc_default_callparams)s
  2915. Notes
  2916. -----
  2917. As this function does no argument checking, it should not be
  2918. called directly; use 'mode' instead.
  2919. """
  2920. return scale / (df + dim + 1)
  2921. def mode(self, df, scale):
  2922. """Mode of the inverse Wishart distribution.
  2923. Parameters
  2924. ----------
  2925. %(_doc_default_callparams)s
  2926. Returns
  2927. -------
  2928. mode : float
  2929. The Mode of the distribution
  2930. """
  2931. dim, df, scale = self._process_parameters(df, scale)
  2932. out = self._mode(dim, df, scale)
  2933. return _squeeze_output(out)
  2934. def _var(self, dim, df, scale):
  2935. """Variance of the inverse Wishart distribution.
  2936. Parameters
  2937. ----------
  2938. dim : int
  2939. Dimension of the scale matrix
  2940. %(_doc_default_callparams)s
  2941. Notes
  2942. -----
  2943. As this function does no argument checking, it should not be
  2944. called directly; use 'var' instead.
  2945. """
  2946. if df > dim + 3:
  2947. var = (df - dim + 1) * scale**2
  2948. diag = scale.diagonal() # 1 x dim array
  2949. var += (df - dim - 1) * np.outer(diag, diag)
  2950. var /= (df - dim) * (df - dim - 1)**2 * (df - dim - 3)
  2951. else:
  2952. var = None
  2953. return var
  2954. def var(self, df, scale):
  2955. """Variance of the inverse Wishart distribution.
  2956. Only valid if the degrees of freedom are greater than the dimension of
  2957. the scale matrix plus three.
  2958. Parameters
  2959. ----------
  2960. %(_doc_default_callparams)s
  2961. Returns
  2962. -------
  2963. var : float
  2964. The variance of the distribution
  2965. """
  2966. dim, df, scale = self._process_parameters(df, scale)
  2967. out = self._var(dim, df, scale)
  2968. return _squeeze_output(out) if out is not None else out
  2969. def _inv_standard_rvs(self, n, shape, dim, df, random_state):
  2970. """
  2971. Parameters
  2972. ----------
  2973. n : integer
  2974. Number of variates to generate
  2975. shape : iterable
  2976. Shape of the variates to generate
  2977. dim : int
  2978. Dimension of the scale matrix
  2979. df : int
  2980. Degrees of freedom
  2981. random_state : {None, int, `numpy.random.Generator`,
  2982. `numpy.random.RandomState`}, optional
  2983. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  2984. singleton is used.
  2985. If `seed` is an int, a new ``RandomState`` instance is used,
  2986. seeded with `seed`.
  2987. If `seed` is already a ``Generator`` or ``RandomState`` instance
  2988. then that instance is used.
  2989. Returns
  2990. -------
  2991. A : ndarray
  2992. Random variates of shape (`shape`) + (``dim``, ``dim``).
  2993. Each slice `A[..., :, :]` is lower-triangular, and its
  2994. inverse is the lower Cholesky factor of a draw from
  2995. `invwishart(df, np.eye(dim))`.
  2996. Notes
  2997. -----
  2998. As this function does no argument checking, it should not be
  2999. called directly; use 'rvs' instead.
  3000. """
  3001. A = np.zeros(shape + (dim, dim))
  3002. # Random normal variates for off-diagonal elements
  3003. tri_rows, tri_cols = np.tril_indices(dim, k=-1)
  3004. n_tril = dim * (dim-1) // 2
  3005. A[..., tri_rows, tri_cols] = random_state.normal(
  3006. size=(*shape, n_tril),
  3007. )
  3008. # Random chi variates for diagonal elements
  3009. rows = np.arange(dim)
  3010. chi_dfs = (df - dim + 1) + rows
  3011. A[..., rows, rows] = random_state.chisquare(
  3012. df=chi_dfs, size=(*shape, dim),
  3013. )**0.5
  3014. return A
  3015. def _rvs(self, n, shape, dim, df, C, random_state):
  3016. """Draw random samples from an inverse Wishart distribution.
  3017. Parameters
  3018. ----------
  3019. n : integer
  3020. Number of variates to generate
  3021. shape : iterable
  3022. Shape of the variates to generate
  3023. dim : int
  3024. Dimension of the scale matrix
  3025. df : int
  3026. Degrees of freedom
  3027. C : ndarray
  3028. Cholesky factorization of the scale matrix, lower triangular.
  3029. %(_doc_random_state)s
  3030. Notes
  3031. -----
  3032. As this function does no argument checking, it should not be
  3033. called directly; use 'rvs' instead.
  3034. """
  3035. random_state = self._get_random_state(random_state)
  3036. # Get random draws A such that inv(A) ~ iW(df, I)
  3037. A = self._inv_standard_rvs(n, shape, dim, df, random_state)
  3038. # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale)
  3039. trsm = get_blas_funcs(('trsm'), (A,))
  3040. trmm = get_blas_funcs(('trmm'), (A,))
  3041. for index in np.ndindex(A.shape[:-2]):
  3042. if dim > 1:
  3043. # Calculate CA
  3044. # Get CA = C A^{-1} via triangular solver
  3045. CA = trsm(1., A[index], C, side=1, lower=True)
  3046. # get SA
  3047. A[index] = trmm(1., CA, CA, side=1, lower=True, trans_a=True)
  3048. else:
  3049. A[index][0, 0] = (C[0, 0] / A[index][0, 0])**2
  3050. return A
  3051. def rvs(self, df, scale, size=1, random_state=None):
  3052. """Draw random samples from an inverse Wishart distribution.
  3053. Parameters
  3054. ----------
  3055. %(_doc_default_callparams)s
  3056. size : integer or iterable of integers, optional
  3057. Number of samples to draw (default 1).
  3058. %(_doc_random_state)s
  3059. Returns
  3060. -------
  3061. rvs : ndarray
  3062. Random variates of shape (`size`) + (``dim``, ``dim``), where
  3063. ``dim`` is the dimension of the scale matrix.
  3064. Notes
  3065. -----
  3066. %(_doc_callparams_note)s
  3067. """
  3068. n, shape = self._process_size(size)
  3069. dim, df, scale = self._process_parameters(df, scale)
  3070. # Cholesky decomposition of scale
  3071. C = scipy.linalg.cholesky(scale, lower=True)
  3072. out = self._rvs(n, shape, dim, df, C, random_state)
  3073. return _squeeze_output(out)
  3074. def _entropy(self, dim, df, log_det_scale):
  3075. # reference: eq. (17) from ref. 3
  3076. psi_eval_points = [0.5 * (df - dim + i) for i in range(1, dim + 1)]
  3077. psi_eval_points = np.asarray(psi_eval_points)
  3078. return multigammaln(0.5 * df, dim) + 0.5 * dim * df + \
  3079. 0.5 * (dim + 1) * (log_det_scale - _LOG_2) - \
  3080. 0.5 * (df + dim + 1) * \
  3081. psi(psi_eval_points, out=psi_eval_points).sum()
  3082. def entropy(self, df, scale):
  3083. dim, df, scale = self._process_parameters(df, scale)
  3084. _, log_det_scale = self._cholesky_logdet(scale)
  3085. return self._entropy(dim, df, log_det_scale)
  3086. invwishart = invwishart_gen()
  3087. class invwishart_frozen(multi_rv_frozen):
  3088. __class_getitem__ = None
  3089. def __init__(self, df, scale, seed=None):
  3090. """Create a frozen inverse Wishart distribution.
  3091. Parameters
  3092. ----------
  3093. df : array_like
  3094. Degrees of freedom of the distribution
  3095. scale : array_like
  3096. Scale matrix of the distribution
  3097. seed : {None, int, `numpy.random.Generator`}, optional
  3098. If `seed` is None the `numpy.random.Generator` singleton is used.
  3099. If `seed` is an int, a new ``Generator`` instance is used,
  3100. seeded with `seed`.
  3101. If `seed` is already a ``Generator`` instance then that instance is
  3102. used.
  3103. """
  3104. self._dist = invwishart_gen(seed)
  3105. self.dim, self.df, self.scale = self._dist._process_parameters(
  3106. df, scale
  3107. )
  3108. # Get the determinant via Cholesky factorization
  3109. self.C = scipy.linalg.cholesky(self.scale, lower=True)
  3110. self.log_det_scale = 2 * np.sum(np.log(self.C.diagonal()))
  3111. def logpdf(self, x):
  3112. x = self._dist._process_quantiles(x, self.dim)
  3113. out = self._dist._logpdf(x, self.dim, self.df,
  3114. self.log_det_scale, self.C)
  3115. return _squeeze_output(out)
  3116. def pdf(self, x):
  3117. return np.exp(self.logpdf(x))
  3118. def mean(self):
  3119. out = self._dist._mean(self.dim, self.df, self.scale)
  3120. return _squeeze_output(out) if out is not None else out
  3121. def mode(self):
  3122. out = self._dist._mode(self.dim, self.df, self.scale)
  3123. return _squeeze_output(out)
  3124. def var(self):
  3125. out = self._dist._var(self.dim, self.df, self.scale)
  3126. return _squeeze_output(out) if out is not None else out
  3127. def rvs(self, size=1, random_state=None):
  3128. n, shape = self._dist._process_size(size)
  3129. out = self._dist._rvs(n, shape, self.dim, self.df,
  3130. self.C, random_state)
  3131. return _squeeze_output(out)
  3132. def entropy(self):
  3133. return self._dist._entropy(self.dim, self.df, self.log_det_scale)
  3134. # Set frozen generator docstrings from corresponding docstrings in
  3135. # inverse Wishart and fill in default strings in class docstrings
  3136. for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs']:
  3137. method = invwishart_gen.__dict__[name]
  3138. method_frozen = wishart_frozen.__dict__[name]
  3139. method_frozen.__doc__ = doccer.docformat(
  3140. method.__doc__, wishart_docdict_noparams)
  3141. method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
  3142. _multinomial_doc_default_callparams = """\
  3143. n : int
  3144. Number of trials
  3145. p : array_like
  3146. Probability of a trial falling into each category; should sum to 1
  3147. """
  3148. _multinomial_doc_callparams_note = """\
  3149. `n` should be a nonnegative integer. Each element of `p` should be in the
  3150. interval :math:`[0,1]` and the elements should sum to 1. If they do not sum to
  3151. 1, the last element of the `p` array is not used and is replaced with the
  3152. remaining probability left over from the earlier elements.
  3153. """
  3154. _multinomial_doc_frozen_callparams = ""
  3155. _multinomial_doc_frozen_callparams_note = """\
  3156. See class definition for a detailed description of parameters."""
  3157. multinomial_docdict_params = {
  3158. '_doc_default_callparams': _multinomial_doc_default_callparams,
  3159. '_doc_callparams_note': _multinomial_doc_callparams_note,
  3160. '_doc_random_state': _doc_random_state
  3161. }
  3162. multinomial_docdict_noparams = {
  3163. '_doc_default_callparams': _multinomial_doc_frozen_callparams,
  3164. '_doc_callparams_note': _multinomial_doc_frozen_callparams_note,
  3165. '_doc_random_state': _doc_random_state
  3166. }
  3167. class multinomial_gen(multi_rv_generic):
  3168. r"""A multinomial random variable.
  3169. Methods
  3170. -------
  3171. pmf(x, n, p)
  3172. Probability mass function.
  3173. logpmf(x, n, p)
  3174. Log of the probability mass function.
  3175. rvs(n, p, size=1, random_state=None)
  3176. Draw random samples from a multinomial distribution.
  3177. entropy(n, p)
  3178. Compute the entropy of the multinomial distribution.
  3179. cov(n, p)
  3180. Compute the covariance matrix of the multinomial distribution.
  3181. Parameters
  3182. ----------
  3183. %(_doc_default_callparams)s
  3184. %(_doc_random_state)s
  3185. Notes
  3186. -----
  3187. %(_doc_callparams_note)s
  3188. The probability mass function for `multinomial` is
  3189. .. math::
  3190. f(x) = \frac{n!}{x_1! \cdots x_k!} p_1^{x_1} \cdots p_k^{x_k},
  3191. supported on :math:`x=(x_1, \ldots, x_k)` where each :math:`x_i` is a
  3192. nonnegative integer and their sum is :math:`n`.
  3193. .. versionadded:: 0.19.0
  3194. Examples
  3195. --------
  3196. >>> from scipy.stats import multinomial
  3197. >>> rv = multinomial(8, [0.3, 0.2, 0.5])
  3198. >>> rv.pmf([1, 3, 4])
  3199. 0.042000000000000072
  3200. The multinomial distribution for :math:`k=2` is identical to the
  3201. corresponding binomial distribution (tiny numerical differences
  3202. notwithstanding):
  3203. >>> from scipy.stats import binom
  3204. >>> multinomial.pmf([3, 4], n=7, p=[0.4, 0.6])
  3205. 0.29030399999999973
  3206. >>> binom.pmf(3, 7, 0.4)
  3207. 0.29030400000000012
  3208. The functions ``pmf``, ``logpmf``, ``entropy``, and ``cov`` support
  3209. broadcasting, under the convention that the vector parameters (``x`` and
  3210. ``p``) are interpreted as if each row along the last axis is a single
  3211. object. For instance:
  3212. >>> multinomial.pmf([[3, 4], [3, 5]], n=[7, 8], p=[.3, .7])
  3213. array([0.2268945, 0.25412184])
  3214. Here, ``x.shape == (2, 2)``, ``n.shape == (2,)``, and ``p.shape == (2,)``,
  3215. but following the rules mentioned above they behave as if the rows
  3216. ``[3, 4]`` and ``[3, 5]`` in ``x`` and ``[.3, .7]`` in ``p`` were a single
  3217. object, and as if we had ``x.shape = (2,)``, ``n.shape = (2,)``, and
  3218. ``p.shape = ()``. To obtain the individual elements without broadcasting,
  3219. we would do this:
  3220. >>> multinomial.pmf([3, 4], n=7, p=[.3, .7])
  3221. 0.2268945
  3222. >>> multinomial.pmf([3, 5], 8, p=[.3, .7])
  3223. 0.25412184
  3224. This broadcasting also works for ``cov``, where the output objects are
  3225. square matrices of size ``p.shape[-1]``. For example:
  3226. >>> multinomial.cov([4, 5], [[.3, .7], [.4, .6]])
  3227. array([[[ 0.84, -0.84],
  3228. [-0.84, 0.84]],
  3229. [[ 1.2 , -1.2 ],
  3230. [-1.2 , 1.2 ]]])
  3231. In this example, ``n.shape == (2,)`` and ``p.shape == (2, 2)``, and
  3232. following the rules above, these broadcast as if ``p.shape == (2,)``.
  3233. Thus the result should also be of shape ``(2,)``, but since each output is
  3234. a :math:`2 \times 2` matrix, the result in fact has shape ``(2, 2, 2)``,
  3235. where ``result[0]`` is equal to ``multinomial.cov(n=4, p=[.3, .7])`` and
  3236. ``result[1]`` is equal to ``multinomial.cov(n=5, p=[.4, .6])``.
  3237. Alternatively, the object may be called (as a function) to fix the `n` and
  3238. `p` parameters, returning a "frozen" multinomial random variable:
  3239. >>> rv = multinomial(n=7, p=[.3, .7])
  3240. >>> # Frozen object with the same methods but holding the given
  3241. >>> # degrees of freedom and scale fixed.
  3242. See also
  3243. --------
  3244. scipy.stats.binom : The binomial distribution.
  3245. numpy.random.Generator.multinomial : Sampling from the multinomial distribution.
  3246. scipy.stats.multivariate_hypergeom :
  3247. The multivariate hypergeometric distribution.
  3248. """
  3249. def __init__(self, seed=None):
  3250. super().__init__(seed)
  3251. self.__doc__ = \
  3252. doccer.docformat(self.__doc__, multinomial_docdict_params)
  3253. def __call__(self, n, p, seed=None):
  3254. """Create a frozen multinomial distribution.
  3255. See `multinomial_frozen` for more information.
  3256. """
  3257. return multinomial_frozen(n, p, seed)
  3258. def _process_parameters(self, n, p):
  3259. """Returns: n_, p_, npcond.
  3260. n_ and p_ are arrays of the correct shape; npcond is a boolean array
  3261. flagging values out of the domain.
  3262. """
  3263. eps = np.finfo(np.result_type(np.asarray(p), np.float32)).eps * 10
  3264. p = np.array(p, dtype=np.float64, copy=True)
  3265. p_adjusted = 1. - p[..., :-1].sum(axis=-1)
  3266. # only make adjustment when it's significant
  3267. i_adjusted = np.abs(1 - p.sum(axis=-1)) > eps
  3268. p[i_adjusted, -1] = p_adjusted[i_adjusted]
  3269. if np.any(i_adjusted):
  3270. message = ("Some rows of `p` do not sum to 1.0 within tolerance of "
  3271. f"{eps=}. Currently, the last element of these rows is adjusted "
  3272. "to compensate, but this condition will produce NaNs "
  3273. "beginning in SciPy 1.18.0. Please ensure that rows of `p` sum "
  3274. "to 1.0 to avoid futher disruption.")
  3275. warnings.warn(message, FutureWarning, stacklevel=3)
  3276. # true for bad p
  3277. pcond = np.any(p < 0, axis=-1)
  3278. pcond |= np.any(p > 1, axis=-1)
  3279. n = np.array(n, dtype=int, copy=True)
  3280. # true for bad n
  3281. ncond = n < 0
  3282. return n, p, ncond | pcond
  3283. def _process_quantiles(self, x, n, p):
  3284. """Returns: x_, xcond.
  3285. x_ is an int array; xcond is a boolean array flagging values out of the
  3286. domain.
  3287. """
  3288. xx = np.asarray(x, dtype=int)
  3289. if xx.ndim == 0:
  3290. raise ValueError("x must be an array.")
  3291. if xx.size != 0 and not xx.shape[-1] == p.shape[-1]:
  3292. raise ValueError(f"Size of each quantile should be size of p: "
  3293. f"received {xx.shape[-1]}, but expected "
  3294. f"{p.shape[-1]}.")
  3295. # true for x out of the domain
  3296. cond = np.any(xx != x, axis=-1)
  3297. cond |= np.any(xx < 0, axis=-1)
  3298. cond = cond | (np.sum(xx, axis=-1) != n)
  3299. return xx, cond
  3300. def _checkresult(self, result, cond, bad_value):
  3301. result = np.asarray(result)
  3302. if cond.ndim != 0:
  3303. result[cond] = bad_value
  3304. elif cond:
  3305. if result.ndim == 0:
  3306. return bad_value
  3307. result[...] = bad_value
  3308. return result
  3309. def _logpmf(self, x, n, p):
  3310. return gammaln(n+1) + np.sum(xlogy(x, p) - gammaln(x+1), axis=-1)
  3311. def logpmf(self, x, n, p):
  3312. """Log of the Multinomial probability mass function.
  3313. Parameters
  3314. ----------
  3315. x : array_like
  3316. Quantiles, with the last axis of `x` denoting the components.
  3317. %(_doc_default_callparams)s
  3318. Returns
  3319. -------
  3320. logpmf : ndarray or scalar
  3321. Log of the probability mass function evaluated at `x`
  3322. Notes
  3323. -----
  3324. %(_doc_callparams_note)s
  3325. """
  3326. n, p, npcond = self._process_parameters(n, p)
  3327. x, xcond = self._process_quantiles(x, n, p)
  3328. result = self._logpmf(x, n, p)
  3329. # replace values for which x was out of the domain; broadcast
  3330. # xcond to the right shape
  3331. xcond_ = xcond | np.zeros(npcond.shape, dtype=np.bool_)
  3332. result = self._checkresult(result, xcond_, -np.inf)
  3333. # replace values bad for n or p; broadcast npcond to the right shape
  3334. npcond_ = npcond | np.zeros(xcond.shape, dtype=np.bool_)
  3335. return self._checkresult(result, npcond_, np.nan)
  3336. def pmf(self, x, n, p):
  3337. """Multinomial probability mass function.
  3338. Parameters
  3339. ----------
  3340. x : array_like
  3341. Quantiles, with the last axis of `x` denoting the components.
  3342. %(_doc_default_callparams)s
  3343. Returns
  3344. -------
  3345. pmf : ndarray or scalar
  3346. Probability density function evaluated at `x`
  3347. Notes
  3348. -----
  3349. %(_doc_callparams_note)s
  3350. """
  3351. return np.exp(self.logpmf(x, n, p))
  3352. def mean(self, n, p):
  3353. """Mean of the Multinomial distribution.
  3354. Parameters
  3355. ----------
  3356. %(_doc_default_callparams)s
  3357. Returns
  3358. -------
  3359. mean : float
  3360. The mean of the distribution
  3361. """
  3362. n, p, npcond = self._process_parameters(n, p)
  3363. result = n[..., np.newaxis]*p
  3364. return self._checkresult(result, npcond, np.nan)
  3365. def cov(self, n, p):
  3366. """Covariance matrix of the multinomial distribution.
  3367. Parameters
  3368. ----------
  3369. %(_doc_default_callparams)s
  3370. Returns
  3371. -------
  3372. cov : ndarray
  3373. The covariance matrix of the distribution
  3374. """
  3375. n, p, npcond = self._process_parameters(n, p)
  3376. nn = n[..., np.newaxis, np.newaxis]
  3377. result = nn * np.einsum('...j,...k->...jk', -p, p)
  3378. # change the diagonal
  3379. for i in range(p.shape[-1]):
  3380. result[..., i, i] += n*p[..., i]
  3381. return self._checkresult(result, npcond, np.nan)
  3382. def entropy(self, n, p):
  3383. r"""Compute the entropy of the multinomial distribution.
  3384. The entropy is computed using this expression:
  3385. .. math::
  3386. f(x) = - \log n! - n\sum_{i=1}^k p_i \log p_i +
  3387. \sum_{i=1}^k \sum_{x=0}^n \binom n x p_i^x(1-p_i)^{n-x} \log x!
  3388. Parameters
  3389. ----------
  3390. %(_doc_default_callparams)s
  3391. Returns
  3392. -------
  3393. h : scalar
  3394. Entropy of the Multinomial distribution
  3395. Notes
  3396. -----
  3397. %(_doc_callparams_note)s
  3398. """
  3399. n, p, npcond = self._process_parameters(n, p)
  3400. x = np.r_[1:np.max(n)+1]
  3401. term1 = n*np.sum(entr(p), axis=-1)
  3402. term1 -= gammaln(n+1)
  3403. n = n[..., np.newaxis]
  3404. new_axes_needed = max(p.ndim, n.ndim) - x.ndim + 1
  3405. new_shape = x.shape + (1,)*new_axes_needed
  3406. x = x.reshape(new_shape)
  3407. term2 = np.sum(binom.pmf(x, n, p)*gammaln(x+1),
  3408. axis=(-1, -1-new_axes_needed))
  3409. return self._checkresult(term1 + term2, npcond, np.nan)
  3410. def rvs(self, n, p, size=None, random_state=None):
  3411. """Draw random samples from a Multinomial distribution.
  3412. Parameters
  3413. ----------
  3414. %(_doc_default_callparams)s
  3415. size : integer or iterable of integers, optional
  3416. Number of samples to draw (default 1).
  3417. %(_doc_random_state)s
  3418. Returns
  3419. -------
  3420. rvs : ndarray or scalar
  3421. Random variates of shape (`size`, `len(p)`)
  3422. Notes
  3423. -----
  3424. %(_doc_callparams_note)s
  3425. """
  3426. n, p, npcond = self._process_parameters(n, p)
  3427. random_state = self._get_random_state(random_state)
  3428. return random_state.multinomial(n, p, size)
  3429. multinomial = multinomial_gen()
  3430. class multinomial_frozen(multi_rv_frozen):
  3431. r"""Create a frozen Multinomial distribution.
  3432. Parameters
  3433. ----------
  3434. n : int
  3435. number of trials
  3436. p: array_like
  3437. probability of a trial falling into each category; should sum to 1
  3438. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  3439. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  3440. singleton is used.
  3441. If `seed` is an int, a new ``RandomState`` instance is used,
  3442. seeded with `seed`.
  3443. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  3444. that instance is used.
  3445. """
  3446. def __init__(self, n, p, seed=None):
  3447. self._dist = multinomial_gen(seed)
  3448. self.n, self.p, self.npcond = self._dist._process_parameters(n, p)
  3449. # monkey patch self._dist
  3450. def _process_parameters(n, p):
  3451. return self.n, self.p, self.npcond
  3452. self._dist._process_parameters = _process_parameters
  3453. def logpmf(self, x):
  3454. return self._dist.logpmf(x, self.n, self.p)
  3455. def pmf(self, x):
  3456. return self._dist.pmf(x, self.n, self.p)
  3457. def mean(self):
  3458. return self._dist.mean(self.n, self.p)
  3459. def cov(self):
  3460. return self._dist.cov(self.n, self.p)
  3461. def entropy(self):
  3462. return self._dist.entropy(self.n, self.p)
  3463. def rvs(self, size=1, random_state=None):
  3464. return self._dist.rvs(self.n, self.p, size, random_state)
  3465. # Set frozen generator docstrings from corresponding docstrings in
  3466. # multinomial and fill in default strings in class docstrings
  3467. for name in ['logpmf', 'pmf', 'mean', 'cov', 'rvs']:
  3468. method = multinomial_gen.__dict__[name]
  3469. method_frozen = multinomial_frozen.__dict__[name]
  3470. method_frozen.__doc__ = doccer.docformat(
  3471. method.__doc__, multinomial_docdict_noparams)
  3472. method.__doc__ = doccer.docformat(method.__doc__,
  3473. multinomial_docdict_params)
  3474. class special_ortho_group_gen(multi_rv_generic):
  3475. r"""A Special Orthogonal matrix (SO(N)) random variable.
  3476. Return a random rotation matrix, drawn from the Haar distribution
  3477. (the only uniform distribution on SO(N)) with a determinant of +1.
  3478. The `dim` keyword specifies the dimension N.
  3479. Methods
  3480. -------
  3481. rvs(dim=None, size=1, random_state=None)
  3482. Draw random samples from SO(N).
  3483. Parameters
  3484. ----------
  3485. dim : scalar
  3486. Dimension of matrices
  3487. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  3488. Used for drawing random variates.
  3489. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  3490. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  3491. with seed.
  3492. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  3493. then that object is used.
  3494. Default is `None`.
  3495. Notes
  3496. -----
  3497. The ``rvs`` method returns a random rotation matrix drawn from the Haar
  3498. distribution, the only uniform distribution on SO(N). The algorithm generates
  3499. a Haar-distributed orthogonal matrix in O(N) using the ``rvs`` method of
  3500. `ortho_group`, then adjusts the matrix to ensure that the determinant is +1.
  3501. For a random rotation in three dimensions, see
  3502. `scipy.spatial.transform.Rotation.random`.
  3503. Examples
  3504. --------
  3505. >>> import numpy as np
  3506. >>> from scipy.stats import special_ortho_group
  3507. >>> x = special_ortho_group.rvs(3)
  3508. >>> np.dot(x, x.T)
  3509. array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
  3510. [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
  3511. [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
  3512. >>> import scipy.linalg
  3513. >>> scipy.linalg.det(x)
  3514. 1.0
  3515. This generates one random matrix from SO(3). It is orthogonal and
  3516. has a determinant of 1.
  3517. Alternatively, the object may be called (as a function) to fix the `dim`
  3518. parameter, returning a "frozen" special_ortho_group random variable:
  3519. >>> rv = special_ortho_group(5)
  3520. >>> # Frozen object with the same methods but holding the
  3521. >>> # dimension parameter fixed.
  3522. See Also
  3523. --------
  3524. ortho_group, scipy.spatial.transform.Rotation.random
  3525. """
  3526. def __init__(self, seed=None):
  3527. super().__init__(seed)
  3528. self.__doc__ = doccer.docformat(self.__doc__)
  3529. def __call__(self, dim=None, seed=None):
  3530. """Create a frozen SO(N) distribution.
  3531. See `special_ortho_group_frozen` for more information.
  3532. """
  3533. return special_ortho_group_frozen(dim, seed=seed)
  3534. def _process_parameters(self, dim):
  3535. """Dimension N must be specified; it cannot be inferred."""
  3536. if dim is None or not np.isscalar(dim) or dim < 0 or dim != int(dim):
  3537. raise ValueError("""Dimension of rotation must be specified,
  3538. and must be a scalar nonnegative integer.""")
  3539. return dim
  3540. def rvs(self, dim, size=1, random_state=None):
  3541. """Draw random samples from SO(N).
  3542. Parameters
  3543. ----------
  3544. dim : integer
  3545. Dimension of rotation space (N).
  3546. size : integer, optional
  3547. Number of samples to draw (default 1).
  3548. Returns
  3549. -------
  3550. rvs : ndarray or scalar
  3551. Random size N-dimensional matrices, dimension (size, dim, dim)
  3552. """
  3553. random_state = self._get_random_state(random_state)
  3554. q = ortho_group.rvs(dim, size, random_state)
  3555. dets = np.linalg.det(q)
  3556. if dim:
  3557. q[..., 0, :] /= dets[..., np.newaxis]
  3558. return q
  3559. special_ortho_group = special_ortho_group_gen()
  3560. class special_ortho_group_frozen(multi_rv_frozen):
  3561. __class_getitem__ = None
  3562. def __init__(self, dim=None, seed=None):
  3563. """Create a frozen SO(N) distribution.
  3564. Parameters
  3565. ----------
  3566. dim : scalar
  3567. Dimension of matrices
  3568. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  3569. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  3570. singleton is used.
  3571. If `seed` is an int, a new ``RandomState`` instance is used,
  3572. seeded with `seed`.
  3573. If `seed` is already a ``Generator`` or ``RandomState`` instance
  3574. then that instance is used.
  3575. Examples
  3576. --------
  3577. >>> from scipy.stats import special_ortho_group
  3578. >>> g = special_ortho_group(5)
  3579. >>> x = g.rvs()
  3580. """ # numpy/numpydoc#87 # noqa: E501
  3581. self._dist = special_ortho_group_gen(seed)
  3582. self.dim = self._dist._process_parameters(dim)
  3583. def rvs(self, size=1, random_state=None):
  3584. return self._dist.rvs(self.dim, size, random_state)
  3585. class ortho_group_gen(multi_rv_generic):
  3586. r"""An Orthogonal matrix (O(N)) random variable.
  3587. Return a random orthogonal matrix, drawn from the O(N) Haar
  3588. distribution (the only uniform distribution on O(N)).
  3589. The `dim` keyword specifies the dimension N.
  3590. Methods
  3591. -------
  3592. rvs(dim=None, size=1, random_state=None)
  3593. Draw random samples from O(N).
  3594. Parameters
  3595. ----------
  3596. dim : scalar
  3597. Dimension of matrices
  3598. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  3599. Used for drawing random variates.
  3600. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  3601. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  3602. with seed.
  3603. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  3604. then that object is used.
  3605. Default is `None`.
  3606. Notes
  3607. -----
  3608. This class is closely related to `special_ortho_group`.
  3609. Some care is taken to avoid numerical error, as per the paper by Mezzadri.
  3610. References
  3611. ----------
  3612. .. [1] F. Mezzadri, "How to generate random matrices from the classical
  3613. compact groups", :arXiv:`math-ph/0609050v2`.
  3614. Examples
  3615. --------
  3616. >>> import numpy as np
  3617. >>> from scipy.stats import ortho_group
  3618. >>> x = ortho_group.rvs(3)
  3619. >>> np.dot(x, x.T)
  3620. array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
  3621. [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
  3622. [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
  3623. >>> import scipy.linalg
  3624. >>> np.fabs(scipy.linalg.det(x))
  3625. 1.0
  3626. This generates one random matrix from O(3). It is orthogonal and
  3627. has a determinant of +1 or -1.
  3628. Alternatively, the object may be called (as a function) to fix the `dim`
  3629. parameter, returning a "frozen" ortho_group random variable:
  3630. >>> rv = ortho_group(5)
  3631. >>> # Frozen object with the same methods but holding the
  3632. >>> # dimension parameter fixed.
  3633. See Also
  3634. --------
  3635. special_ortho_group
  3636. """
  3637. def __init__(self, seed=None):
  3638. super().__init__(seed)
  3639. self.__doc__ = doccer.docformat(self.__doc__)
  3640. def __call__(self, dim=None, seed=None):
  3641. """Create a frozen O(N) distribution.
  3642. See `ortho_group_frozen` for more information.
  3643. """
  3644. return ortho_group_frozen(dim, seed=seed)
  3645. def _process_parameters(self, dim):
  3646. """Dimension N must be specified; it cannot be inferred."""
  3647. if dim is None or not np.isscalar(dim) or dim < 0 or dim != int(dim):
  3648. raise ValueError("Dimension of rotation must be specified,"
  3649. "and must be a scalar nonnegative integer.")
  3650. return dim
  3651. def rvs(self, dim, size=1, random_state=None):
  3652. """Draw random samples from O(N).
  3653. Parameters
  3654. ----------
  3655. dim : integer
  3656. Dimension of rotation space (N).
  3657. size : integer, optional
  3658. Number of samples to draw (default 1).
  3659. Returns
  3660. -------
  3661. rvs : ndarray or scalar
  3662. Random size N-dimensional matrices, dimension (size, dim, dim)
  3663. """
  3664. random_state = self._get_random_state(random_state)
  3665. size = int(size)
  3666. dim = self._process_parameters(dim)
  3667. size = (size,) if size > 1 else ()
  3668. z = random_state.normal(size=size + (dim, dim))
  3669. q, r = np.linalg.qr(z)
  3670. # The last two dimensions are the rows and columns of R matrices.
  3671. # Extract the diagonals. Note that this eliminates a dimension.
  3672. d = r.diagonal(offset=0, axis1=-2, axis2=-1)
  3673. # Add back a dimension for proper broadcasting: we're dividing
  3674. # each row of each R matrix by the diagonal of the R matrix.
  3675. q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly
  3676. return q
  3677. ortho_group = ortho_group_gen()
  3678. class ortho_group_frozen(multi_rv_frozen):
  3679. __class_getitem__ = None
  3680. def __init__(self, dim=None, seed=None):
  3681. """Create a frozen O(N) distribution.
  3682. Parameters
  3683. ----------
  3684. dim : scalar
  3685. Dimension of matrices
  3686. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  3687. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  3688. singleton is used.
  3689. If `seed` is an int, a new ``RandomState`` instance is used,
  3690. seeded with `seed`.
  3691. If `seed` is already a ``Generator`` or ``RandomState`` instance
  3692. then that instance is used.
  3693. Examples
  3694. --------
  3695. >>> from scipy.stats import ortho_group
  3696. >>> g = ortho_group(5)
  3697. >>> x = g.rvs()
  3698. """ # numpy/numpydoc#87 # noqa: E501
  3699. self._dist = ortho_group_gen(seed)
  3700. self.dim = self._dist._process_parameters(dim)
  3701. def rvs(self, size=1, random_state=None):
  3702. return self._dist.rvs(self.dim, size, random_state)
  3703. class random_correlation_gen(multi_rv_generic):
  3704. r"""A random correlation matrix.
  3705. Return a random correlation matrix, given a vector of eigenvalues.
  3706. The returned matrix is symmetric positive semidefinite with unit diagonal.
  3707. The `eigs` keyword specifies the eigenvalues of the correlation matrix,
  3708. and implies the dimension.
  3709. Methods
  3710. -------
  3711. rvs(eigs=None, random_state=None)
  3712. Draw random correlation matrices, all with eigenvalues eigs.
  3713. Parameters
  3714. ----------
  3715. eigs : 1d ndarray
  3716. Eigenvalues of correlation matrix. All eigenvalues need to be non-negative and
  3717. need to sum to the number of eigenvalues.
  3718. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  3719. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  3720. singleton is used.
  3721. If `seed` is an int, a new ``RandomState`` instance is used,
  3722. seeded with `seed`.
  3723. If `seed` is already a ``Generator`` or ``RandomState`` instance
  3724. then that instance is used.
  3725. tol : float, optional
  3726. Tolerance for input parameter checks
  3727. diag_tol : float, optional
  3728. Tolerance for deviation of the diagonal of the resulting
  3729. matrix. Default: 1e-7
  3730. Raises
  3731. ------
  3732. RuntimeError
  3733. Floating point error prevented generating a valid correlation
  3734. matrix.
  3735. Returns
  3736. -------
  3737. rvs : ndarray or scalar
  3738. Random size N-dimensional matrices, dimension (size, dim, dim),
  3739. each having eigenvalues eigs.
  3740. Notes
  3741. -----
  3742. Generates a random correlation matrix following a numerically stable
  3743. algorithm spelled out by Davies & Higham. This algorithm uses a single O(N)
  3744. similarity transformation to construct a symmetric positive semi-definite
  3745. matrix, and applies a series of Givens rotations to scale it to have ones
  3746. on the diagonal.
  3747. References
  3748. ----------
  3749. .. [1] Davies, Philip I; Higham, Nicholas J; "Numerically stable generation
  3750. of correlation matrices and their factors", BIT 2000, Vol. 40,
  3751. No. 4, pp. 640 651
  3752. Examples
  3753. --------
  3754. >>> import numpy as np
  3755. >>> from scipy.stats import random_correlation
  3756. >>> rng = np.random.default_rng()
  3757. >>> x = random_correlation.rvs((.5, .8, 1.2, 1.5), random_state=rng)
  3758. >>> x
  3759. array([[ 1. , -0.02423399, 0.03130519, 0.4946965 ],
  3760. [-0.02423399, 1. , 0.20334736, 0.04039817],
  3761. [ 0.03130519, 0.20334736, 1. , 0.02694275],
  3762. [ 0.4946965 , 0.04039817, 0.02694275, 1. ]])
  3763. >>> import scipy.linalg
  3764. >>> e, v = scipy.linalg.eigh(x)
  3765. >>> e
  3766. array([ 0.5, 0.8, 1.2, 1.5])
  3767. """
  3768. def __init__(self, seed=None):
  3769. super().__init__(seed)
  3770. self.__doc__ = doccer.docformat(self.__doc__)
  3771. def __call__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):
  3772. """Create a frozen random correlation matrix.
  3773. See `random_correlation_frozen` for more information.
  3774. """
  3775. return random_correlation_frozen(eigs, seed=seed, tol=tol,
  3776. diag_tol=diag_tol)
  3777. def _process_parameters(self, eigs, tol):
  3778. eigs = np.asarray(eigs, dtype=float)
  3779. dim = eigs.size
  3780. if eigs.ndim != 1 or eigs.shape[0] != dim or dim <= 1:
  3781. raise ValueError("Array 'eigs' must be a vector of length "
  3782. "greater than 1.")
  3783. if np.fabs(np.sum(eigs) - dim) > tol:
  3784. raise ValueError("Sum of eigenvalues must equal dimensionality.")
  3785. for x in eigs:
  3786. if x < -tol:
  3787. raise ValueError("All eigenvalues must be non-negative.")
  3788. return dim, eigs
  3789. def _givens_to_1(self, aii, ajj, aij):
  3790. """Computes a 2x2 Givens matrix to put 1's on the diagonal.
  3791. The input matrix is a 2x2 symmetric matrix M = [ aii aij ; aij ajj ].
  3792. The output matrix g is a 2x2 anti-symmetric matrix of the form
  3793. [ c s ; -s c ]; the elements c and s are returned.
  3794. Applying the output matrix to the input matrix (as b=g.T M g)
  3795. results in a matrix with bii=1, provided tr(M) - det(M) >= 1
  3796. and floating point issues do not occur. Otherwise, some other
  3797. valid rotation is returned. When tr(M)==2, also bjj=1.
  3798. """
  3799. aiid = aii - 1.
  3800. ajjd = ajj - 1.
  3801. if ajjd == 0:
  3802. # ajj==1, so swap aii and ajj to avoid division by zero
  3803. return 0., 1.
  3804. dd = math.sqrt(max(aij**2 - aiid*ajjd, 0))
  3805. # The choice of t should be chosen to avoid cancellation [1]
  3806. t = (aij + math.copysign(dd, aij)) / ajjd
  3807. c = 1. / math.sqrt(1. + t*t)
  3808. if c == 0:
  3809. # Underflow
  3810. s = 1.0
  3811. else:
  3812. s = c*t
  3813. return c, s
  3814. def _to_corr(self, m):
  3815. """
  3816. Given a psd matrix m, rotate to put one's on the diagonal, turning it
  3817. into a correlation matrix. This also requires the trace equal the
  3818. dimensionality. Note: modifies input matrix
  3819. """
  3820. # Check requirements for in-place Givens
  3821. if not (m.flags.c_contiguous and m.dtype == np.float64 and
  3822. m.shape[0] == m.shape[1]):
  3823. raise ValueError()
  3824. d = m.shape[0]
  3825. for i in range(d-1):
  3826. if m[i, i] == 1:
  3827. continue
  3828. elif m[i, i] > 1:
  3829. for j in range(i+1, d):
  3830. if m[j, j] < 1:
  3831. break
  3832. else:
  3833. for j in range(i+1, d):
  3834. if m[j, j] > 1:
  3835. break
  3836. c, s = self._givens_to_1(m[i, i], m[j, j], m[i, j])
  3837. # Use BLAS to apply Givens rotations in-place. Equivalent to:
  3838. # g = np.eye(d)
  3839. # g[i, i] = g[j,j] = c
  3840. # g[j, i] = -s; g[i, j] = s
  3841. # m = np.dot(g.T, np.dot(m, g))
  3842. mv = m.ravel()
  3843. drot(mv, mv, c, -s, n=d,
  3844. offx=i*d, incx=1, offy=j*d, incy=1,
  3845. overwrite_x=True, overwrite_y=True)
  3846. drot(mv, mv, c, -s, n=d,
  3847. offx=i, incx=d, offy=j, incy=d,
  3848. overwrite_x=True, overwrite_y=True)
  3849. return m
  3850. def rvs(self, eigs, random_state=None, tol=1e-13, diag_tol=1e-7):
  3851. """Draw random correlation matrices.
  3852. Parameters
  3853. ----------
  3854. eigs : 1d ndarray
  3855. Eigenvalues of correlation matrix
  3856. tol : float, optional
  3857. Tolerance for input parameter checks
  3858. diag_tol : float, optional
  3859. Tolerance for deviation of the diagonal of the resulting
  3860. matrix. Default: 1e-7
  3861. Raises
  3862. ------
  3863. RuntimeError
  3864. Floating point error prevented generating a valid correlation
  3865. matrix.
  3866. Returns
  3867. -------
  3868. rvs : ndarray or scalar
  3869. Random size N-dimensional matrices, dimension (size, dim, dim),
  3870. each having eigenvalues eigs.
  3871. """
  3872. dim, eigs = self._process_parameters(eigs, tol=tol)
  3873. random_state = self._get_random_state(random_state)
  3874. m = ortho_group.rvs(dim, random_state=random_state)
  3875. m = np.dot(np.dot(m, np.diag(eigs)), m.T) # Set the trace of m
  3876. m = self._to_corr(m) # Carefully rotate to unit diagonal
  3877. # Check diagonal
  3878. if abs(m.diagonal() - 1).max() > diag_tol:
  3879. raise RuntimeError("Failed to generate a valid correlation matrix")
  3880. return m
  3881. random_correlation = random_correlation_gen()
  3882. class random_correlation_frozen(multi_rv_frozen):
  3883. __class_getitem__ = None
  3884. def __init__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):
  3885. """Create a frozen random correlation matrix distribution.
  3886. Parameters
  3887. ----------
  3888. eigs : 1d ndarray
  3889. Eigenvalues of correlation matrix
  3890. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  3891. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  3892. singleton is used.
  3893. If `seed` is an int, a new ``RandomState`` instance is used,
  3894. seeded with `seed`.
  3895. If `seed` is already a ``Generator`` or ``RandomState`` instance
  3896. then that instance is used.
  3897. tol : float, optional
  3898. Tolerance for input parameter checks
  3899. diag_tol : float, optional
  3900. Tolerance for deviation of the diagonal of the resulting
  3901. matrix. Default: 1e-7
  3902. Raises
  3903. ------
  3904. RuntimeError
  3905. Floating point error prevented generating a valid correlation
  3906. matrix.
  3907. Returns
  3908. -------
  3909. rvs : ndarray or scalar
  3910. Random size N-dimensional matrices, dimension (size, dim, dim),
  3911. each having eigenvalues eigs.
  3912. """ # numpy/numpydoc#87 # noqa: E501
  3913. self._dist = random_correlation_gen(seed)
  3914. self.tol = tol
  3915. self.diag_tol = diag_tol
  3916. _, self.eigs = self._dist._process_parameters(eigs, tol=self.tol)
  3917. def rvs(self, random_state=None):
  3918. return self._dist.rvs(self.eigs, random_state=random_state,
  3919. tol=self.tol, diag_tol=self.diag_tol)
  3920. class unitary_group_gen(multi_rv_generic):
  3921. r"""A matrix-valued U(N) random variable.
  3922. Return a random unitary matrix.
  3923. The `dim` keyword specifies the dimension N.
  3924. Methods
  3925. -------
  3926. rvs(dim=None, size=1, random_state=None)
  3927. Draw random samples from U(N).
  3928. Parameters
  3929. ----------
  3930. dim : scalar
  3931. Dimension of matrices.
  3932. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  3933. Used for drawing random variates.
  3934. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  3935. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  3936. with seed.
  3937. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  3938. then that object is used.
  3939. Default is `None`.
  3940. Notes
  3941. -----
  3942. This class is similar to `ortho_group`.
  3943. References
  3944. ----------
  3945. .. [1] F. Mezzadri, "How to generate random matrices from the classical
  3946. compact groups", :arXiv:`math-ph/0609050v2`.
  3947. Examples
  3948. --------
  3949. >>> import numpy as np
  3950. >>> from scipy.stats import unitary_group
  3951. >>> x = unitary_group.rvs(3)
  3952. >>> np.dot(x, x.conj().T)
  3953. array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
  3954. [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
  3955. [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) # may vary
  3956. This generates one random matrix from U(3). The dot product confirms that
  3957. it is unitary up to machine precision.
  3958. Alternatively, the object may be called (as a function) to fix the `dim`
  3959. parameter, return a "frozen" unitary_group random variable:
  3960. >>> rv = unitary_group(5)
  3961. See Also
  3962. --------
  3963. ortho_group
  3964. """
  3965. def __init__(self, seed=None):
  3966. super().__init__(seed)
  3967. self.__doc__ = doccer.docformat(self.__doc__)
  3968. def __call__(self, dim=None, seed=None):
  3969. """Create a frozen (U(N)) n-dimensional unitary matrix distribution.
  3970. See `unitary_group_frozen` for more information.
  3971. """
  3972. return unitary_group_frozen(dim, seed=seed)
  3973. def _process_parameters(self, dim):
  3974. """Dimension N must be specified; it cannot be inferred."""
  3975. if dim is None or not np.isscalar(dim) or dim < 0 or dim != int(dim):
  3976. raise ValueError("Dimension of rotation must be specified,"
  3977. "and must be a scalar nonnegative integer.")
  3978. return dim
  3979. def rvs(self, dim, size=1, random_state=None):
  3980. """Draw random samples from U(N).
  3981. Parameters
  3982. ----------
  3983. dim : integer
  3984. Dimension of space (N).
  3985. size : integer, optional
  3986. Number of samples to draw (default 1).
  3987. Returns
  3988. -------
  3989. rvs : ndarray or scalar
  3990. Random size N-dimensional matrices, dimension (size, dim, dim)
  3991. """
  3992. random_state = self._get_random_state(random_state)
  3993. size = int(size)
  3994. dim = self._process_parameters(dim)
  3995. size = (size,) if size > 1 else ()
  3996. z = 1/math.sqrt(2)*(random_state.normal(size=size + (dim, dim)) +
  3997. 1j*random_state.normal(size=size + (dim, dim)))
  3998. q, r = np.linalg.qr(z)
  3999. # The last two dimensions are the rows and columns of R matrices.
  4000. # Extract the diagonals. Note that this eliminates a dimension.
  4001. d = r.diagonal(offset=0, axis1=-2, axis2=-1)
  4002. # Add back a dimension for proper broadcasting: we're dividing
  4003. # each row of each R matrix by the diagonal of the R matrix.
  4004. q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly
  4005. return q
  4006. unitary_group = unitary_group_gen()
  4007. class unitary_group_frozen(multi_rv_frozen):
  4008. __class_getitem__ = None
  4009. def __init__(self, dim=None, seed=None):
  4010. """Create a frozen (U(N)) n-dimensional unitary matrix distribution.
  4011. Parameters
  4012. ----------
  4013. dim : scalar
  4014. Dimension of matrices
  4015. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  4016. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  4017. singleton is used.
  4018. If `seed` is an int, a new ``RandomState`` instance is used,
  4019. seeded with `seed`.
  4020. If `seed` is already a ``Generator`` or ``RandomState`` instance
  4021. then that instance is used.
  4022. Examples
  4023. --------
  4024. >>> from scipy.stats import unitary_group
  4025. >>> x = unitary_group(3)
  4026. >>> x.rvs()
  4027. """ # numpy/numpydoc#87 # noqa: E501
  4028. self._dist = unitary_group_gen(seed)
  4029. self.dim = self._dist._process_parameters(dim)
  4030. def rvs(self, size=1, random_state=None):
  4031. return self._dist.rvs(self.dim, size, random_state)
  4032. _mvt_doc_default_callparams = """\
  4033. loc : array_like, optional
  4034. Location of the distribution. (default ``0``)
  4035. shape : array_like, optional
  4036. Positive semidefinite matrix of the distribution. (default ``1``)
  4037. df : float, optional
  4038. Degrees of freedom of the distribution; must be greater than zero.
  4039. If ``np.inf`` then results are multivariate normal. The default is ``1``.
  4040. allow_singular : bool, optional
  4041. Whether to allow a singular matrix. (default ``False``)
  4042. """
  4043. _mvt_doc_callparams_note = """\
  4044. Setting the parameter `loc` to ``None`` is equivalent to having `loc`
  4045. be the zero-vector. The parameter `shape` can be a scalar, in which case
  4046. the shape matrix is the identity times that value, a vector of
  4047. diagonal entries for the shape matrix, or a two-dimensional array_like.
  4048. """
  4049. _mvt_doc_frozen_callparams_note = """\
  4050. See class definition for a detailed description of parameters."""
  4051. mvt_docdict_params = {
  4052. '_mvt_doc_default_callparams': _mvt_doc_default_callparams,
  4053. '_mvt_doc_callparams_note': _mvt_doc_callparams_note,
  4054. '_doc_random_state': _doc_random_state
  4055. }
  4056. mvt_docdict_noparams = {
  4057. '_mvt_doc_default_callparams': "",
  4058. '_mvt_doc_callparams_note': _mvt_doc_frozen_callparams_note,
  4059. '_doc_random_state': _doc_random_state
  4060. }
  4061. class multivariate_t_gen(multi_rv_generic):
  4062. r"""A multivariate t-distributed random variable.
  4063. The `loc` parameter specifies the location. The `shape` parameter specifies
  4064. the positive semidefinite shape matrix. The `df` parameter specifies the
  4065. degrees of freedom.
  4066. In addition to calling the methods below, the object itself may be called
  4067. as a function to fix the location, shape matrix, and degrees of freedom
  4068. parameters, returning a "frozen" multivariate t-distribution random.
  4069. Methods
  4070. -------
  4071. pdf(x, loc=None, shape=1, df=1, allow_singular=False)
  4072. Probability density function.
  4073. logpdf(x, loc=None, shape=1, df=1, allow_singular=False)
  4074. Log of the probability density function.
  4075. cdf(x, loc=None, shape=1, df=1, allow_singular=False, *,
  4076. maxpts=None, lower_limit=None, random_state=None)
  4077. Cumulative distribution function.
  4078. rvs(loc=None, shape=1, df=1, size=1, random_state=None)
  4079. Draw random samples from a multivariate t-distribution.
  4080. entropy(loc=None, shape=1, df=1)
  4081. Differential entropy of a multivariate t-distribution.
  4082. marginal(dimensions, loc=None, shape=1, df=1, allow_singular=False)
  4083. Return a marginal multivariate t-distribution.
  4084. Parameters
  4085. ----------
  4086. %(_mvt_doc_default_callparams)s
  4087. %(_doc_random_state)s
  4088. Notes
  4089. -----
  4090. %(_mvt_doc_callparams_note)s
  4091. The matrix `shape` must be a (symmetric) positive semidefinite matrix. The
  4092. determinant and inverse of `shape` are computed as the pseudo-determinant
  4093. and pseudo-inverse, respectively, so that `shape` does not need to have
  4094. full rank.
  4095. The probability density function for `multivariate_t` is
  4096. .. math::
  4097. f(x) = \frac{\Gamma((\nu + p)/2)}{\Gamma(\nu/2)\nu^{p/2}\pi^{p/2}|\Sigma|^{1/2}}
  4098. \left[1 + \frac{1}{\nu} (\mathbf{x} - \boldsymbol{\mu})^{\top}
  4099. \boldsymbol{\Sigma}^{-1}
  4100. (\mathbf{x} - \boldsymbol{\mu}) \right]^{-(\nu + p)/2},
  4101. where :math:`p` is the dimension of :math:`\mathbf{x}`,
  4102. :math:`\boldsymbol{\mu}` is the :math:`p`-dimensional location,
  4103. :math:`\boldsymbol{\Sigma}` the :math:`p \times p`-dimensional shape
  4104. matrix, and :math:`\nu` is the degrees of freedom.
  4105. .. versionadded:: 1.6.0
  4106. References
  4107. ----------
  4108. .. [1] Arellano-Valle et al. "Shannon Entropy and Mutual Information for
  4109. Multivariate Skew-Elliptical Distributions". Scandinavian Journal
  4110. of Statistics. Vol. 40, issue 1.
  4111. Examples
  4112. --------
  4113. The object may be called (as a function) to fix the `loc`, `shape`,
  4114. `df`, and `allow_singular` parameters, returning a "frozen"
  4115. multivariate_t random variable:
  4116. >>> import numpy as np
  4117. >>> from scipy.stats import multivariate_t
  4118. >>> rv = multivariate_t([1.0, -0.5], [[2.1, 0.3], [0.3, 1.5]], df=2)
  4119. >>> # Frozen object with the same methods but holding the given location,
  4120. >>> # scale, and degrees of freedom fixed.
  4121. Create a contour plot of the PDF.
  4122. >>> import matplotlib.pyplot as plt
  4123. >>> x, y = np.mgrid[-1:3:.01, -2:1.5:.01]
  4124. >>> pos = np.dstack((x, y))
  4125. >>> fig, ax = plt.subplots(1, 1)
  4126. >>> ax.set_aspect('equal')
  4127. >>> plt.contourf(x, y, rv.pdf(pos))
  4128. """
  4129. def __init__(self, seed=None):
  4130. """Initialize a multivariate t-distributed random variable.
  4131. Parameters
  4132. ----------
  4133. seed : Random state.
  4134. """
  4135. super().__init__(seed)
  4136. self.__doc__ = doccer.docformat(self.__doc__, mvt_docdict_params)
  4137. self._random_state = check_random_state(seed)
  4138. def __call__(self, loc=None, shape=1, df=1, allow_singular=False,
  4139. seed=None):
  4140. """Create a frozen multivariate t-distribution.
  4141. See `multivariate_t_frozen` for parameters.
  4142. """
  4143. if df == np.inf:
  4144. return multivariate_normal_frozen(mean=loc, cov=shape,
  4145. allow_singular=allow_singular,
  4146. seed=seed)
  4147. return multivariate_t_frozen(loc=loc, shape=shape, df=df,
  4148. allow_singular=allow_singular, seed=seed)
  4149. def pdf(self, x, loc=None, shape=1, df=1, allow_singular=False):
  4150. """Multivariate t-distribution probability density function.
  4151. Parameters
  4152. ----------
  4153. x : array_like
  4154. Points at which to evaluate the probability density function.
  4155. %(_mvt_doc_default_callparams)s
  4156. Returns
  4157. -------
  4158. pdf : Probability density function evaluated at `x`.
  4159. Examples
  4160. --------
  4161. >>> from scipy.stats import multivariate_t
  4162. >>> x = [0.4, 5]
  4163. >>> loc = [0, 1]
  4164. >>> shape = [[1, 0.1], [0.1, 1]]
  4165. >>> df = 7
  4166. >>> multivariate_t.pdf(x, loc, shape, df)
  4167. 0.00075713
  4168. """
  4169. dim, loc, shape, df = self._process_parameters(loc, shape, df)
  4170. x = self._process_quantiles(x, dim)
  4171. shape_info = _PSD(shape, allow_singular=allow_singular)
  4172. logpdf = self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df,
  4173. dim, shape_info.rank)
  4174. return np.exp(logpdf)
  4175. def logpdf(self, x, loc=None, shape=1, df=1):
  4176. """Log of the multivariate t-distribution probability density function.
  4177. Parameters
  4178. ----------
  4179. x : array_like
  4180. Points at which to evaluate the log of the probability density
  4181. function.
  4182. %(_mvt_doc_default_callparams)s
  4183. Returns
  4184. -------
  4185. logpdf : Log of the probability density function evaluated at `x`.
  4186. Examples
  4187. --------
  4188. >>> from scipy.stats import multivariate_t
  4189. >>> x = [0.4, 5]
  4190. >>> loc = [0, 1]
  4191. >>> shape = [[1, 0.1], [0.1, 1]]
  4192. >>> df = 7
  4193. >>> multivariate_t.logpdf(x, loc, shape, df)
  4194. -7.1859802
  4195. See Also
  4196. --------
  4197. pdf : Probability density function.
  4198. """
  4199. dim, loc, shape, df = self._process_parameters(loc, shape, df)
  4200. x = self._process_quantiles(x, dim)
  4201. shape_info = _PSD(shape)
  4202. cov_object = _covariance.CovViaPSD(shape_info)
  4203. return self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df, dim,
  4204. shape_info.rank, cov_object)
  4205. def _logpdf(self, x, loc, prec_U, log_pdet, df, dim, rank, cov_object=None):
  4206. """Utility method `pdf`, `logpdf` for parameters.
  4207. Parameters
  4208. ----------
  4209. x : ndarray
  4210. Points at which to evaluate the log of the probability density
  4211. function.
  4212. loc : ndarray
  4213. Location of the distribution.
  4214. prec_U : ndarray
  4215. A decomposition such that `np.dot(prec_U, prec_U.T)` is the inverse
  4216. of the shape matrix.
  4217. log_pdet : float
  4218. Logarithm of the determinant of the shape matrix.
  4219. df : float
  4220. Degrees of freedom of the distribution.
  4221. dim : int
  4222. Dimension of the quantiles x.
  4223. rank : int
  4224. Rank of the shape matrix.
  4225. Notes
  4226. -----
  4227. As this function does no argument checking, it should not be called
  4228. directly; use 'logpdf' instead.
  4229. """
  4230. if df == np.inf:
  4231. return multivariate_normal._logpdf(x, loc, cov_object)
  4232. dev = x - loc
  4233. maha = np.square(np.dot(dev, prec_U)).sum(axis=-1)
  4234. t = 0.5 * (df + dim)
  4235. A = gammaln(t)
  4236. B = gammaln(0.5 * df)
  4237. C = dim/2. * np.log(df * np.pi)
  4238. D = 0.5 * log_pdet
  4239. E = -t * np.log(1 + (1./df) * maha)
  4240. return _squeeze_output(A - B - C - D + E)
  4241. def _cdf(self, x, loc, shape, df, dim, maxpts=None, lower_limit=None,
  4242. random_state=None):
  4243. # All of this - random state validation, maxpts, apply_along_axis,
  4244. # etc. needs to go in this private method unless we want
  4245. # frozen distribution's `cdf` method to duplicate it or call `cdf`,
  4246. # which would require re-processing parameters
  4247. if random_state is not None:
  4248. rng = check_random_state(random_state)
  4249. else:
  4250. rng = self._random_state
  4251. if not maxpts:
  4252. maxpts = 1000 * dim
  4253. x = self._process_quantiles(x, dim)
  4254. lower_limit = (np.full(loc.shape, -np.inf)
  4255. if lower_limit is None else lower_limit)
  4256. # remove the mean
  4257. x, lower_limit = x - loc, lower_limit - loc
  4258. b, a = np.broadcast_arrays(x, lower_limit)
  4259. i_swap = b < a
  4260. signs = (-1)**(i_swap.sum(axis=-1)) # odd # of swaps -> negative
  4261. a, b = a.copy(), b.copy()
  4262. a[i_swap], b[i_swap] = b[i_swap], a[i_swap]
  4263. n = x.shape[-1]
  4264. limits = np.concatenate((a, b), axis=-1)
  4265. def func1d(limits):
  4266. a, b = limits[:n], limits[n:]
  4267. return _qmvt(maxpts, df, shape, a, b, rng)[0]
  4268. res = np.apply_along_axis(func1d, -1, limits) * signs
  4269. # Fixing the output shape for existing distributions is a separate
  4270. # issue. For now, let's keep this consistent with pdf.
  4271. return _squeeze_output(res)
  4272. def cdf(self, x, loc=None, shape=1, df=1, allow_singular=False, *,
  4273. maxpts=None, lower_limit=None, random_state=None):
  4274. """Multivariate t-distribution cumulative distribution function.
  4275. Parameters
  4276. ----------
  4277. x : array_like
  4278. Points at which to evaluate the cumulative distribution function.
  4279. %(_mvt_doc_default_callparams)s
  4280. maxpts : int, optional
  4281. Maximum number of points to use for integration. The default is
  4282. 1000 times the number of dimensions.
  4283. lower_limit : array_like, optional
  4284. Lower limit of integration of the cumulative distribution function.
  4285. Default is negative infinity. Must be broadcastable with `x`.
  4286. %(_doc_random_state)s
  4287. Returns
  4288. -------
  4289. cdf : ndarray or scalar
  4290. Cumulative distribution function evaluated at `x`.
  4291. Examples
  4292. --------
  4293. >>> from scipy.stats import multivariate_t
  4294. >>> x = [0.4, 5]
  4295. >>> loc = [0, 1]
  4296. >>> shape = [[1, 0.1], [0.1, 1]]
  4297. >>> df = 7
  4298. >>> multivariate_t.cdf(x, loc, shape, df)
  4299. 0.64798491
  4300. """
  4301. dim, loc, shape, df = self._process_parameters(loc, shape, df)
  4302. shape = _PSD(shape, allow_singular=allow_singular)._M
  4303. return self._cdf(x, loc, shape, df, dim, maxpts,
  4304. lower_limit, random_state)
  4305. def _entropy(self, dim, df=1, shape=1):
  4306. if df == np.inf:
  4307. return multivariate_normal(None, cov=shape).entropy()
  4308. shape_info = _PSD(shape)
  4309. shape_term = 0.5 * shape_info.log_pdet
  4310. def regular(dim, df):
  4311. halfsum = 0.5 * (dim + df)
  4312. half_df = 0.5 * df
  4313. return (
  4314. -gammaln(halfsum) + gammaln(half_df)
  4315. + 0.5 * dim * np.log(df * np.pi) + halfsum
  4316. * (psi(halfsum) - psi(half_df))
  4317. + shape_term
  4318. )
  4319. def asymptotic(dim, df):
  4320. # Formula from Wolfram Alpha:
  4321. # "asymptotic expansion -gammaln((m+d)/2) + gammaln(d/2) + (m*log(d*pi))/2
  4322. # + ((m+d)/2) * (digamma((m+d)/2) - digamma(d/2))"
  4323. return (
  4324. dim * norm._entropy() + dim / df
  4325. - dim * (dim - 2) * df**-2.0 / 4
  4326. + dim**2 * (dim - 2) * df**-3.0 / 6
  4327. + dim * (-3 * dim**3 + 8 * dim**2 - 8) * df**-4.0 / 24
  4328. + dim**2 * (3 * dim**3 - 10 * dim**2 + 16) * df**-5.0 / 30
  4329. + shape_term
  4330. )[()]
  4331. # preserves ~12 digits accuracy up to at least `dim=1e5`. See gh-18465.
  4332. threshold = dim * 100 * 4 / (np.log(dim) + 1)
  4333. return xpx.apply_where(df >= threshold, (dim, df), asymptotic, regular)
  4334. def entropy(self, loc=None, shape=1, df=1):
  4335. """Calculate the differential entropy of a multivariate
  4336. t-distribution.
  4337. Parameters
  4338. ----------
  4339. %(_mvt_doc_default_callparams)s
  4340. Returns
  4341. -------
  4342. h : float
  4343. Differential entropy
  4344. """
  4345. dim, loc, shape, df = self._process_parameters(None, shape, df)
  4346. return self._entropy(dim, df, shape)
  4347. def rvs(self, loc=None, shape=1, df=1, size=1, random_state=None):
  4348. """Draw random samples from a multivariate t-distribution.
  4349. Parameters
  4350. ----------
  4351. %(_mvt_doc_default_callparams)s
  4352. size : integer, optional
  4353. Number of samples to draw (default 1).
  4354. %(_doc_random_state)s
  4355. Returns
  4356. -------
  4357. rvs : ndarray or scalar
  4358. Random variates of size (`size`, `P`), where `P` is the
  4359. dimension of the random variable.
  4360. Examples
  4361. --------
  4362. >>> from scipy.stats import multivariate_t
  4363. >>> x = [0.4, 5]
  4364. >>> loc = [0, 1]
  4365. >>> shape = [[1, 0.1], [0.1, 1]]
  4366. >>> df = 7
  4367. >>> multivariate_t.rvs(loc, shape, df)
  4368. array([[0.93477495, 3.00408716]])
  4369. """
  4370. # For implementation details, see equation (3):
  4371. #
  4372. # Hofert, "On Sampling from the Multivariatet Distribution", 2013
  4373. # http://rjournal.github.io/archive/2013-2/hofert.pdf
  4374. #
  4375. dim, loc, shape, df = self._process_parameters(loc, shape, df)
  4376. if random_state is not None:
  4377. rng = check_random_state(random_state)
  4378. else:
  4379. rng = self._random_state
  4380. if np.isinf(df):
  4381. x = np.ones(size)
  4382. else:
  4383. x = rng.chisquare(df, size=size) / df
  4384. z = rng.multivariate_normal(np.zeros(dim), shape, size=size)
  4385. samples = loc + z / np.sqrt(x)[..., None]
  4386. return _squeeze_output(samples)
  4387. def _process_quantiles(self, x, dim):
  4388. """
  4389. Adjust quantiles array so that last axis labels the components of
  4390. each data point.
  4391. """
  4392. x = np.asarray(x, dtype=float)
  4393. if x.ndim == 0:
  4394. x = x[np.newaxis]
  4395. elif x.ndim == 1:
  4396. if dim == 1:
  4397. x = x[:, np.newaxis]
  4398. else:
  4399. x = x[np.newaxis, :]
  4400. return x
  4401. def _process_parameters(self, loc, shape, df):
  4402. """
  4403. Infer dimensionality from location array and shape matrix, handle
  4404. defaults, and ensure compatible dimensions.
  4405. """
  4406. if loc is None and shape is None:
  4407. loc = np.asarray(0, dtype=float)
  4408. shape = np.asarray(1, dtype=float)
  4409. dim = 1
  4410. elif loc is None:
  4411. shape = np.asarray(shape, dtype=float)
  4412. if shape.ndim < 2:
  4413. dim = 1
  4414. else:
  4415. dim = shape.shape[0]
  4416. loc = np.zeros(dim)
  4417. elif shape is None:
  4418. loc = np.asarray(loc, dtype=float)
  4419. dim = loc.size
  4420. shape = np.eye(dim)
  4421. else:
  4422. shape = np.asarray(shape, dtype=float)
  4423. loc = np.asarray(loc, dtype=float)
  4424. dim = loc.size
  4425. if dim == 1:
  4426. loc = loc.reshape(1)
  4427. shape = shape.reshape(1, 1)
  4428. if loc.ndim != 1 or loc.shape[0] != dim:
  4429. raise ValueError(f"Array 'loc' must be a vector of length {dim}.")
  4430. if shape.ndim == 0:
  4431. shape = shape * np.eye(dim)
  4432. elif shape.ndim == 1:
  4433. shape = np.diag(shape)
  4434. elif shape.ndim == 2 and shape.shape != (dim, dim):
  4435. rows, cols = shape.shape
  4436. if rows != cols:
  4437. msg = ("Array 'cov' must be square if it is two dimensional,"
  4438. f" but cov.shape = {str(shape.shape)}.")
  4439. else:
  4440. msg = ("Dimension mismatch: array 'cov' is of shape %s,"
  4441. " but 'loc' is a vector of length %d.")
  4442. msg = msg % (str(shape.shape), len(loc))
  4443. raise ValueError(msg)
  4444. elif shape.ndim > 2:
  4445. raise ValueError(f"Array 'cov' must be at most two-dimensional, "
  4446. f"but cov.ndim = {shape.ndim}")
  4447. # Process degrees of freedom.
  4448. if df is None:
  4449. df = 1
  4450. elif df <= 0:
  4451. raise ValueError("'df' must be greater than zero.")
  4452. elif np.isnan(df):
  4453. raise ValueError("'df' is 'nan' but must be greater than zero or 'np.inf'.")
  4454. return dim, loc, shape, df
  4455. def marginal(self, dimensions, loc=None, shape=1, df=1, allow_singular=False):
  4456. """Return a marginal multivariate t-distribution.
  4457. Parameters
  4458. ----------
  4459. dimensions : int or 1-d array_like
  4460. The dimensions of the multivariate t corresponding
  4461. with the marginal variables, that is, the indices of the dimensions
  4462. that are being retained. The other dimensions are marginalized out.
  4463. %(_mvt_doc_default_callparams)s
  4464. Returns
  4465. -------
  4466. marginal_multivariate_t : multivariate_t_frozen
  4467. An object representing the marginal t-distribution.
  4468. Notes
  4469. -----
  4470. %(_mvt_doc_frozen_callparams_note)s
  4471. """
  4472. params = self._process_parameters(loc, shape, df)
  4473. n, loc, shape, df = params
  4474. dims = _validate_marginal_input(dimensions, n)
  4475. loc = loc[dims]
  4476. shape = shape[np.ix_(dims, dims)]
  4477. return multivariate_t_frozen(loc, shape, df, allow_singular)
  4478. class multivariate_t_frozen(multi_rv_frozen):
  4479. __class_getitem__ = None
  4480. def __init__(self, loc=None, shape=1, df=1, allow_singular=False,
  4481. seed=None):
  4482. """Create a frozen multivariate t distribution.
  4483. Parameters
  4484. ----------
  4485. %(_mvt_doc_default_callparams)s
  4486. Examples
  4487. --------
  4488. >>> import numpy as np
  4489. >>> from scipy.stats import multivariate_t
  4490. >>> loc = np.zeros(3)
  4491. >>> shape = np.eye(3)
  4492. >>> df = 10
  4493. >>> dist = multivariate_t(loc, shape, df)
  4494. >>> dist.rvs()
  4495. array([[ 0.81412036, -1.53612361, 0.42199647]])
  4496. >>> dist.pdf([1, 1, 1])
  4497. array([0.01237803])
  4498. """
  4499. self._dist = multivariate_t_gen(seed)
  4500. dim, loc, shape, df = self._dist._process_parameters(loc, shape, df)
  4501. self.dim, self.loc, self.shape, self.df = dim, loc, shape, df
  4502. self.shape_info = _PSD(shape, allow_singular=allow_singular)
  4503. self.allow_singular = allow_singular
  4504. def logpdf(self, x):
  4505. x = self._dist._process_quantiles(x, self.dim)
  4506. U = self.shape_info.U
  4507. log_pdet = self.shape_info.log_pdet
  4508. return self._dist._logpdf(x, self.loc, U, log_pdet, self.df, self.dim,
  4509. self.shape_info.rank)
  4510. def cdf(self, x, *, maxpts=None, lower_limit=None, random_state=None):
  4511. x = self._dist._process_quantiles(x, self.dim)
  4512. return self._dist._cdf(x, self.loc, self.shape, self.df, self.dim,
  4513. maxpts, lower_limit, random_state)
  4514. def pdf(self, x):
  4515. return np.exp(self.logpdf(x))
  4516. def rvs(self, size=1, random_state=None):
  4517. return self._dist.rvs(loc=self.loc,
  4518. shape=self.shape,
  4519. df=self.df,
  4520. size=size,
  4521. random_state=random_state)
  4522. def entropy(self):
  4523. return self._dist._entropy(self.dim, self.df, self.shape)
  4524. def marginal(self, dimensions):
  4525. return self._dist.marginal(dimensions, self.loc,
  4526. self.shape, self.df, self.allow_singular)
  4527. multivariate_t = multivariate_t_gen()
  4528. # Set frozen generator docstrings from corresponding docstrings in
  4529. # multivariate_t_gen and fill in default strings in class docstrings
  4530. for name in ['logpdf', 'pdf', 'rvs', 'cdf', 'entropy']:
  4531. method = multivariate_t_gen.__dict__[name]
  4532. method_frozen = multivariate_t_frozen.__dict__[name]
  4533. method_frozen.__doc__ = doccer.docformat(method.__doc__,
  4534. mvt_docdict_noparams)
  4535. method.__doc__ = doccer.docformat(method.__doc__, mvt_docdict_params)
  4536. _mhg_doc_default_callparams = """\
  4537. m : array_like
  4538. The number of each type of object in the population.
  4539. That is, :math:`m[i]` is the number of objects of
  4540. type :math:`i`.
  4541. n : array_like
  4542. The number of samples taken from the population.
  4543. """
  4544. _mhg_doc_callparams_note = """\
  4545. `m` must be an array of positive integers. If the quantile
  4546. :math:`i` contains values out of the range :math:`[0, m_i]`
  4547. where :math:`m_i` is the number of objects of type :math:`i`
  4548. in the population or if the parameters are inconsistent with one
  4549. another (e.g. ``x.sum() != n``), methods return the appropriate
  4550. value (e.g. ``0`` for ``pmf``). If `m` or `n` contain negative
  4551. values, the result will contain ``nan`` there.
  4552. """
  4553. _mhg_doc_frozen_callparams = ""
  4554. _mhg_doc_frozen_callparams_note = """\
  4555. See class definition for a detailed description of parameters."""
  4556. mhg_docdict_params = {
  4557. '_doc_default_callparams': _mhg_doc_default_callparams,
  4558. '_doc_callparams_note': _mhg_doc_callparams_note,
  4559. '_doc_random_state': _doc_random_state
  4560. }
  4561. mhg_docdict_noparams = {
  4562. '_doc_default_callparams': _mhg_doc_frozen_callparams,
  4563. '_doc_callparams_note': _mhg_doc_frozen_callparams_note,
  4564. '_doc_random_state': _doc_random_state
  4565. }
  4566. class multivariate_hypergeom_gen(multi_rv_generic):
  4567. r"""A multivariate hypergeometric random variable.
  4568. Methods
  4569. -------
  4570. pmf(x, m, n)
  4571. Probability mass function.
  4572. logpmf(x, m, n)
  4573. Log of the probability mass function.
  4574. rvs(m, n, size=1, random_state=None)
  4575. Draw random samples from a multivariate hypergeometric
  4576. distribution.
  4577. mean(m, n)
  4578. Mean of the multivariate hypergeometric distribution.
  4579. var(m, n)
  4580. Variance of the multivariate hypergeometric distribution.
  4581. cov(m, n)
  4582. Compute the covariance matrix of the multivariate
  4583. hypergeometric distribution.
  4584. Parameters
  4585. ----------
  4586. %(_doc_default_callparams)s
  4587. %(_doc_random_state)s
  4588. Notes
  4589. -----
  4590. %(_doc_callparams_note)s
  4591. The probability mass function for `multivariate_hypergeom` is
  4592. .. math::
  4593. P(X_1 = x_1, X_2 = x_2, \ldots, X_k = x_k) = \frac{\binom{m_1}{x_1}
  4594. \binom{m_2}{x_2} \cdots \binom{m_k}{x_k}}{\binom{M}{n}}, \\ \quad
  4595. (x_1, x_2, \ldots, x_k) \in \mathbb{N}^k \text{ with }
  4596. \sum_{i=1}^k x_i = n
  4597. where :math:`m_i` are the number of objects of type :math:`i`, :math:`M`
  4598. is the total number of objects in the population (sum of all the
  4599. :math:`m_i`), and :math:`n` is the size of the sample to be taken
  4600. from the population.
  4601. .. versionadded:: 1.6.0
  4602. Examples
  4603. --------
  4604. To evaluate the probability mass function of the multivariate
  4605. hypergeometric distribution, with a dichotomous population of size
  4606. :math:`10` and :math:`20`, at a sample of size :math:`12` with
  4607. :math:`8` objects of the first type and :math:`4` objects of the
  4608. second type, use:
  4609. >>> from scipy.stats import multivariate_hypergeom
  4610. >>> multivariate_hypergeom.pmf(x=[8, 4], m=[10, 20], n=12)
  4611. 0.0025207176631464523
  4612. The `multivariate_hypergeom` distribution is identical to the
  4613. corresponding `hypergeom` distribution (tiny numerical differences
  4614. notwithstanding) when only two types (good and bad) of objects
  4615. are present in the population as in the example above. Consider
  4616. another example for a comparison with the hypergeometric distribution:
  4617. >>> from scipy.stats import hypergeom
  4618. >>> multivariate_hypergeom.pmf(x=[3, 1], m=[10, 5], n=4)
  4619. 0.4395604395604395
  4620. >>> hypergeom.pmf(k=3, M=15, n=4, N=10)
  4621. 0.43956043956044005
  4622. The functions ``pmf``, ``logpmf``, ``mean``, ``var``, ``cov``, and ``rvs``
  4623. support broadcasting, under the convention that the vector parameters
  4624. (``x``, ``m``, and ``n``) are interpreted as if each row along the last
  4625. axis is a single object. For instance, we can combine the previous two
  4626. calls to `multivariate_hypergeom` as
  4627. >>> multivariate_hypergeom.pmf(x=[[8, 4], [3, 1]], m=[[10, 20], [10, 5]],
  4628. ... n=[12, 4])
  4629. array([0.00252072, 0.43956044])
  4630. This broadcasting also works for ``cov``, where the output objects are
  4631. square matrices of size ``m.shape[-1]``. For example:
  4632. >>> multivariate_hypergeom.cov(m=[[7, 9], [10, 15]], n=[8, 12])
  4633. array([[[ 1.05, -1.05],
  4634. [-1.05, 1.05]],
  4635. [[ 1.56, -1.56],
  4636. [-1.56, 1.56]]])
  4637. That is, ``result[0]`` is equal to
  4638. ``multivariate_hypergeom.cov(m=[7, 9], n=8)`` and ``result[1]`` is equal
  4639. to ``multivariate_hypergeom.cov(m=[10, 15], n=12)``.
  4640. Alternatively, the object may be called (as a function) to fix the `m`
  4641. and `n` parameters, returning a "frozen" multivariate hypergeometric
  4642. random variable.
  4643. >>> rv = multivariate_hypergeom(m=[10, 20], n=12)
  4644. >>> rv.pmf(x=[8, 4])
  4645. 0.0025207176631464523
  4646. See Also
  4647. --------
  4648. scipy.stats.hypergeom : The hypergeometric distribution.
  4649. scipy.stats.multinomial : The multinomial distribution.
  4650. References
  4651. ----------
  4652. .. [1] The Multivariate Hypergeometric Distribution,
  4653. http://www.randomservices.org/random/urn/MultiHypergeometric.html
  4654. .. [2] Thomas J. Sargent and John Stachurski, 2020,
  4655. Multivariate Hypergeometric Distribution
  4656. https://python.quantecon.org/multi_hyper.html
  4657. """
  4658. def __init__(self, seed=None):
  4659. super().__init__(seed)
  4660. self.__doc__ = doccer.docformat(self.__doc__, mhg_docdict_params)
  4661. def __call__(self, m, n, seed=None):
  4662. """Create a frozen multivariate_hypergeom distribution.
  4663. See `multivariate_hypergeom_frozen` for more information.
  4664. """
  4665. return multivariate_hypergeom_frozen(m, n, seed=seed)
  4666. def _process_parameters(self, m, n):
  4667. m = np.asarray(m)
  4668. n = np.asarray(n)
  4669. if m.size == 0:
  4670. m = m.astype(int)
  4671. if n.size == 0:
  4672. n = n.astype(int)
  4673. if not np.issubdtype(m.dtype, np.integer):
  4674. raise TypeError("'m' must an array of integers.")
  4675. if not np.issubdtype(n.dtype, np.integer):
  4676. raise TypeError("'n' must an array of integers.")
  4677. if m.ndim == 0:
  4678. raise ValueError("'m' must be an array with"
  4679. " at least one dimension.")
  4680. # check for empty arrays
  4681. if m.size != 0:
  4682. n = n[..., np.newaxis]
  4683. m, n = np.broadcast_arrays(m, n)
  4684. # check for empty arrays
  4685. if m.size != 0:
  4686. n = n[..., 0]
  4687. mcond = m < 0
  4688. M = m.sum(axis=-1)
  4689. ncond = (n < 0) | (n > M)
  4690. return M, m, n, mcond, ncond, np.any(mcond, axis=-1) | ncond
  4691. def _process_quantiles(self, x, M, m, n):
  4692. x = np.asarray(x)
  4693. if not np.issubdtype(x.dtype, np.integer):
  4694. raise TypeError("'x' must an array of integers.")
  4695. if x.ndim == 0:
  4696. raise ValueError("'x' must be an array with"
  4697. " at least one dimension.")
  4698. if not x.shape[-1] == m.shape[-1]:
  4699. raise ValueError(f"Size of each quantile must be size of 'm': "
  4700. f"received {x.shape[-1]}, "
  4701. f"but expected {m.shape[-1]}.")
  4702. # check for empty arrays
  4703. if m.size != 0:
  4704. n = n[..., np.newaxis]
  4705. M = M[..., np.newaxis]
  4706. x, m, n, M = np.broadcast_arrays(x, m, n, M)
  4707. # check for empty arrays
  4708. if m.size != 0:
  4709. n, M = n[..., 0], M[..., 0]
  4710. xcond = (x < 0) | (x > m)
  4711. return (x, M, m, n, xcond,
  4712. np.any(xcond, axis=-1) | (x.sum(axis=-1) != n))
  4713. def _checkresult(self, result, cond, bad_value):
  4714. result = np.asarray(result)
  4715. if cond.ndim != 0:
  4716. result[cond] = bad_value
  4717. elif cond:
  4718. return bad_value
  4719. if result.ndim == 0:
  4720. return result[()]
  4721. return result
  4722. def _logpmf(self, x, M, m, n, mxcond, ncond):
  4723. # This equation of the pmf comes from the relation,
  4724. # n combine r = beta(n+1, 1) / beta(r+1, n-r+1)
  4725. num = np.zeros_like(m, dtype=np.float64)
  4726. den = np.zeros_like(n, dtype=np.float64)
  4727. m, x = m[~mxcond], x[~mxcond]
  4728. M, n = M[~ncond], n[~ncond]
  4729. num[~mxcond] = (betaln(m+1, 1) - betaln(x+1, m-x+1))
  4730. den[~ncond] = (betaln(M+1, 1) - betaln(n+1, M-n+1))
  4731. num[mxcond] = np.nan
  4732. den[ncond] = np.nan
  4733. num = num.sum(axis=-1)
  4734. return num - den
  4735. def logpmf(self, x, m, n):
  4736. """Log of the multivariate hypergeometric probability mass function.
  4737. Parameters
  4738. ----------
  4739. x : array_like
  4740. Quantiles, with the last axis of `x` denoting the components.
  4741. %(_doc_default_callparams)s
  4742. Returns
  4743. -------
  4744. logpmf : ndarray or scalar
  4745. Log of the probability mass function evaluated at `x`
  4746. Notes
  4747. -----
  4748. %(_doc_callparams_note)s
  4749. """
  4750. M, m, n, mcond, ncond, mncond = self._process_parameters(m, n)
  4751. (x, M, m, n, xcond,
  4752. xcond_reduced) = self._process_quantiles(x, M, m, n)
  4753. mxcond = mcond | xcond
  4754. ncond = ncond | np.zeros(n.shape, dtype=np.bool_)
  4755. result = self._logpmf(x, M, m, n, mxcond, ncond)
  4756. # replace values for which x was out of the domain; broadcast
  4757. # xcond to the right shape
  4758. xcond_ = xcond_reduced | np.zeros(mncond.shape, dtype=np.bool_)
  4759. result = self._checkresult(result, xcond_, -np.inf)
  4760. # replace values bad for n or m; broadcast
  4761. # mncond to the right shape
  4762. mncond_ = mncond | np.zeros(xcond_reduced.shape, dtype=np.bool_)
  4763. return self._checkresult(result, mncond_, np.nan)
  4764. def pmf(self, x, m, n):
  4765. """Multivariate hypergeometric probability mass function.
  4766. Parameters
  4767. ----------
  4768. x : array_like
  4769. Quantiles, with the last axis of `x` denoting the components.
  4770. %(_doc_default_callparams)s
  4771. Returns
  4772. -------
  4773. pmf : ndarray or scalar
  4774. Probability density function evaluated at `x`
  4775. Notes
  4776. -----
  4777. %(_doc_callparams_note)s
  4778. """
  4779. out = np.exp(self.logpmf(x, m, n))
  4780. return out
  4781. def mean(self, m, n):
  4782. """Mean of the multivariate hypergeometric distribution.
  4783. Parameters
  4784. ----------
  4785. %(_doc_default_callparams)s
  4786. Returns
  4787. -------
  4788. mean : array_like or scalar
  4789. The mean of the distribution
  4790. """
  4791. M, m, n, _, _, mncond = self._process_parameters(m, n)
  4792. # check for empty arrays
  4793. if m.size != 0:
  4794. M, n = M[..., np.newaxis], n[..., np.newaxis]
  4795. cond = (M == 0)
  4796. M = np.ma.masked_array(M, mask=cond)
  4797. mu = n*(m/M)
  4798. if m.size != 0:
  4799. mncond = (mncond[..., np.newaxis] |
  4800. np.zeros(mu.shape, dtype=np.bool_))
  4801. return self._checkresult(mu, mncond, np.nan)
  4802. def var(self, m, n):
  4803. """Variance of the multivariate hypergeometric distribution.
  4804. Parameters
  4805. ----------
  4806. %(_doc_default_callparams)s
  4807. Returns
  4808. -------
  4809. array_like
  4810. The variances of the components of the distribution. This is
  4811. the diagonal of the covariance matrix of the distribution
  4812. """
  4813. M, m, n, _, _, mncond = self._process_parameters(m, n)
  4814. # check for empty arrays
  4815. if m.size != 0:
  4816. M, n = M[..., np.newaxis], n[..., np.newaxis]
  4817. cond = (M == 0) & (M-1 == 0)
  4818. M = np.ma.masked_array(M, mask=cond)
  4819. output = n * m/M * (M-m)/M * (M-n)/(M-1)
  4820. if m.size != 0:
  4821. mncond = (mncond[..., np.newaxis] |
  4822. np.zeros(output.shape, dtype=np.bool_))
  4823. return self._checkresult(output, mncond, np.nan)
  4824. def cov(self, m, n):
  4825. """Covariance matrix of the multivariate hypergeometric distribution.
  4826. Parameters
  4827. ----------
  4828. %(_doc_default_callparams)s
  4829. Returns
  4830. -------
  4831. cov : array_like
  4832. The covariance matrix of the distribution
  4833. """
  4834. # see [1]_ for the formula and [2]_ for implementation
  4835. # cov( x_i,x_j ) = -n * (M-n)/(M-1) * (K_i*K_j) / (M**2)
  4836. M, m, n, _, _, mncond = self._process_parameters(m, n)
  4837. # check for empty arrays
  4838. if m.size != 0:
  4839. M = M[..., np.newaxis, np.newaxis]
  4840. n = n[..., np.newaxis, np.newaxis]
  4841. cond = (M == 0) & (M-1 == 0)
  4842. M = np.ma.masked_array(M, mask=cond)
  4843. output = (-n * (M-n)/(M-1) *
  4844. np.einsum("...i,...j->...ij", m, m) / (M**2))
  4845. # check for empty arrays
  4846. if m.size != 0:
  4847. M, n = M[..., 0, 0], n[..., 0, 0]
  4848. cond = cond[..., 0, 0]
  4849. dim = m.shape[-1]
  4850. # diagonal entries need to be computed differently
  4851. for i in range(dim):
  4852. output[..., i, i] = (n * (M-n) * m[..., i]*(M-m[..., i]))
  4853. output[..., i, i] = output[..., i, i] / (M-1)
  4854. output[..., i, i] = output[..., i, i] / (M**2)
  4855. if m.size != 0:
  4856. mncond = (mncond[..., np.newaxis, np.newaxis] |
  4857. np.zeros(output.shape, dtype=np.bool_))
  4858. return self._checkresult(output, mncond, np.nan)
  4859. def rvs(self, m, n, size=None, random_state=None):
  4860. """Draw random samples from a multivariate hypergeometric distribution.
  4861. Parameters
  4862. ----------
  4863. %(_doc_default_callparams)s
  4864. size : integer or iterable of integers, optional
  4865. Number of samples to draw. Default is ``None``, in which case a
  4866. single variate is returned as an array with shape ``m.shape``.
  4867. %(_doc_random_state)s
  4868. Returns
  4869. -------
  4870. rvs : array_like
  4871. Random variates of shape ``size`` or ``m.shape``
  4872. (if ``size=None``).
  4873. Notes
  4874. -----
  4875. %(_doc_callparams_note)s
  4876. Also note that NumPy's `multivariate_hypergeometric` sampler is not
  4877. used as it doesn't support broadcasting.
  4878. """
  4879. M, m, n, _, _, _ = self._process_parameters(m, n)
  4880. random_state = self._get_random_state(random_state)
  4881. if size is not None and isinstance(size, int):
  4882. size = (size, )
  4883. if size is None:
  4884. rvs = np.empty(m.shape, dtype=m.dtype)
  4885. else:
  4886. rvs = np.empty(size + (m.shape[-1], ), dtype=m.dtype)
  4887. rem = M
  4888. # This sampler has been taken from numpy gh-13794
  4889. # https://github.com/numpy/numpy/pull/13794
  4890. for c in range(m.shape[-1] - 1):
  4891. rem = rem - m[..., c]
  4892. n0mask = n == 0
  4893. rvs[..., c] = (~n0mask *
  4894. random_state.hypergeometric(m[..., c],
  4895. rem + n0mask,
  4896. n + n0mask,
  4897. size=size))
  4898. n = n - rvs[..., c]
  4899. rvs[..., m.shape[-1] - 1] = n
  4900. return rvs
  4901. multivariate_hypergeom = multivariate_hypergeom_gen()
  4902. class multivariate_hypergeom_frozen(multi_rv_frozen):
  4903. def __init__(self, m, n, seed=None):
  4904. self._dist = multivariate_hypergeom_gen(seed)
  4905. (self.M, self.m, self.n,
  4906. self.mcond, self.ncond,
  4907. self.mncond) = self._dist._process_parameters(m, n)
  4908. # monkey patch self._dist
  4909. def _process_parameters(m, n):
  4910. return (self.M, self.m, self.n,
  4911. self.mcond, self.ncond,
  4912. self.mncond)
  4913. self._dist._process_parameters = _process_parameters
  4914. def logpmf(self, x):
  4915. return self._dist.logpmf(x, self.m, self.n)
  4916. def pmf(self, x):
  4917. return self._dist.pmf(x, self.m, self.n)
  4918. def mean(self):
  4919. return self._dist.mean(self.m, self.n)
  4920. def var(self):
  4921. return self._dist.var(self.m, self.n)
  4922. def cov(self):
  4923. return self._dist.cov(self.m, self.n)
  4924. def rvs(self, size=1, random_state=None):
  4925. return self._dist.rvs(self.m, self.n,
  4926. size=size,
  4927. random_state=random_state)
  4928. # Set frozen generator docstrings from corresponding docstrings in
  4929. # multivariate_hypergeom and fill in default strings in class docstrings
  4930. for name in ['logpmf', 'pmf', 'mean', 'var', 'cov', 'rvs']:
  4931. method = multivariate_hypergeom_gen.__dict__[name]
  4932. method_frozen = multivariate_hypergeom_frozen.__dict__[name]
  4933. method_frozen.__doc__ = doccer.docformat(
  4934. method.__doc__, mhg_docdict_noparams)
  4935. method.__doc__ = doccer.docformat(method.__doc__,
  4936. mhg_docdict_params)
  4937. class random_table_gen(multi_rv_generic):
  4938. r"""Contingency tables from independent samples with fixed marginal sums.
  4939. This is the distribution of random tables with given row and column vector
  4940. sums. This distribution represents the set of random tables under the null
  4941. hypothesis that rows and columns are independent. It is used in hypothesis
  4942. tests of independence.
  4943. Because of assumed independence, the expected frequency of each table
  4944. element can be computed from the row and column sums, so that the
  4945. distribution is completely determined by these two vectors.
  4946. Methods
  4947. -------
  4948. logpmf(x)
  4949. Log-probability of table `x` to occur in the distribution.
  4950. pmf(x)
  4951. Probability of table `x` to occur in the distribution.
  4952. mean(row, col)
  4953. Mean table.
  4954. rvs(row, col, size=None, method=None, random_state=None)
  4955. Draw random tables with given row and column vector sums.
  4956. Parameters
  4957. ----------
  4958. %(_doc_row_col)s
  4959. %(_doc_random_state)s
  4960. Notes
  4961. -----
  4962. %(_doc_row_col_note)s
  4963. Random elements from the distribution are generated either with Boyett's
  4964. [1]_ or Patefield's algorithm [2]_. Boyett's algorithm has
  4965. O(N) time and space complexity, where N is the total sum of entries in the
  4966. table. Patefield's algorithm has O(K x log(N)) time complexity, where K is
  4967. the number of cells in the table and requires only a small constant work
  4968. space. By default, the `rvs` method selects the fastest algorithm based on
  4969. the input, but you can specify the algorithm with the keyword `method`.
  4970. Allowed values are "boyett" and "patefield".
  4971. .. versionadded:: 1.10.0
  4972. Examples
  4973. --------
  4974. >>> from scipy.stats import random_table
  4975. >>> row = [1, 5]
  4976. >>> col = [2, 3, 1]
  4977. >>> random_table.mean(row, col)
  4978. array([[0.33333333, 0.5 , 0.16666667],
  4979. [1.66666667, 2.5 , 0.83333333]])
  4980. Alternatively, the object may be called (as a function) to fix the row
  4981. and column vector sums, returning a "frozen" distribution.
  4982. >>> dist = random_table(row, col)
  4983. >>> dist.rvs(random_state=123)
  4984. array([[1, 0, 0],
  4985. [1, 3, 1]])
  4986. References
  4987. ----------
  4988. .. [1] J. Boyett, AS 144 Appl. Statist. 28 (1979) 329-332
  4989. .. [2] W.M. Patefield, AS 159 Appl. Statist. 30 (1981) 91-97
  4990. """
  4991. def __init__(self, seed=None):
  4992. super().__init__(seed)
  4993. def __call__(self, row, col, *, seed=None):
  4994. """Create a frozen distribution of tables with given marginals.
  4995. See `random_table_frozen` for more information.
  4996. """
  4997. return random_table_frozen(row, col, seed=seed)
  4998. def logpmf(self, x, row, col):
  4999. """Log-probability of table to occur in the distribution.
  5000. Parameters
  5001. ----------
  5002. %(_doc_x)s
  5003. %(_doc_row_col)s
  5004. Returns
  5005. -------
  5006. logpmf : ndarray or scalar
  5007. Log of the probability mass function evaluated at `x`.
  5008. Notes
  5009. -----
  5010. %(_doc_row_col_note)s
  5011. If row and column marginals of `x` do not match `row` and `col`,
  5012. negative infinity is returned.
  5013. Examples
  5014. --------
  5015. >>> from scipy.stats import random_table
  5016. >>> import numpy as np
  5017. >>> x = [[1, 5, 1], [2, 3, 1]]
  5018. >>> row = np.sum(x, axis=1)
  5019. >>> col = np.sum(x, axis=0)
  5020. >>> random_table.logpmf(x, row, col)
  5021. -1.6306401200847027
  5022. Alternatively, the object may be called (as a function) to fix the row
  5023. and column vector sums, returning a "frozen" distribution.
  5024. >>> d = random_table(row, col)
  5025. >>> d.logpmf(x)
  5026. -1.6306401200847027
  5027. """
  5028. r, c, n = self._process_parameters(row, col)
  5029. x = np.asarray(x)
  5030. if x.ndim < 2:
  5031. raise ValueError("`x` must be at least two-dimensional")
  5032. dtype_is_int = np.issubdtype(x.dtype, np.integer)
  5033. with np.errstate(invalid='ignore'):
  5034. if not dtype_is_int and not np.all(x.astype(int) == x):
  5035. raise ValueError("`x` must contain only integral values")
  5036. # x does not contain NaN if we arrive here
  5037. if np.any(x < 0):
  5038. raise ValueError("`x` must contain only non-negative values")
  5039. r2 = np.sum(x, axis=-1)
  5040. c2 = np.sum(x, axis=-2)
  5041. if r2.shape[-1] != len(r):
  5042. raise ValueError("shape of `x` must agree with `row`")
  5043. if c2.shape[-1] != len(c):
  5044. raise ValueError("shape of `x` must agree with `col`")
  5045. res = np.empty(x.shape[:-2])
  5046. mask = np.all(r2 == r, axis=-1) & np.all(c2 == c, axis=-1)
  5047. def lnfac(x):
  5048. return gammaln(x + 1)
  5049. res[mask] = (np.sum(lnfac(r), axis=-1) + np.sum(lnfac(c), axis=-1)
  5050. - lnfac(n) - np.sum(lnfac(x[mask]), axis=(-1, -2)))
  5051. res[~mask] = -np.inf
  5052. return res[()]
  5053. def pmf(self, x, row, col):
  5054. """Probability of table to occur in the distribution.
  5055. Parameters
  5056. ----------
  5057. %(_doc_x)s
  5058. %(_doc_row_col)s
  5059. Returns
  5060. -------
  5061. pmf : ndarray or scalar
  5062. Probability mass function evaluated at `x`.
  5063. Notes
  5064. -----
  5065. %(_doc_row_col_note)s
  5066. If row and column marginals of `x` do not match `row` and `col`,
  5067. zero is returned.
  5068. Examples
  5069. --------
  5070. >>> from scipy.stats import random_table
  5071. >>> import numpy as np
  5072. >>> x = [[1, 5, 1], [2, 3, 1]]
  5073. >>> row = np.sum(x, axis=1)
  5074. >>> col = np.sum(x, axis=0)
  5075. >>> random_table.pmf(x, row, col)
  5076. 0.19580419580419592
  5077. Alternatively, the object may be called (as a function) to fix the row
  5078. and column vector sums, returning a "frozen" distribution.
  5079. >>> d = random_table(row, col)
  5080. >>> d.pmf(x)
  5081. 0.19580419580419592
  5082. """
  5083. return np.exp(self.logpmf(x, row, col))
  5084. def mean(self, row, col):
  5085. """Mean of distribution of conditional tables.
  5086. %(_doc_mean_params)s
  5087. Returns
  5088. -------
  5089. mean: ndarray
  5090. Mean of the distribution.
  5091. Notes
  5092. -----
  5093. %(_doc_row_col_note)s
  5094. Examples
  5095. --------
  5096. >>> from scipy.stats import random_table
  5097. >>> row = [1, 5]
  5098. >>> col = [2, 3, 1]
  5099. >>> random_table.mean(row, col)
  5100. array([[0.33333333, 0.5 , 0.16666667],
  5101. [1.66666667, 2.5 , 0.83333333]])
  5102. Alternatively, the object may be called (as a function) to fix the row
  5103. and column vector sums, returning a "frozen" distribution.
  5104. >>> d = random_table(row, col)
  5105. >>> d.mean()
  5106. array([[0.33333333, 0.5 , 0.16666667],
  5107. [1.66666667, 2.5 , 0.83333333]])
  5108. """
  5109. r, c, n = self._process_parameters(row, col)
  5110. return np.outer(r, c) / n
  5111. def rvs(self, row, col, *, size=None, method=None, random_state=None):
  5112. """Draw random tables with fixed column and row marginals.
  5113. Parameters
  5114. ----------
  5115. %(_doc_row_col)s
  5116. size : integer, optional
  5117. Number of samples to draw (default 1).
  5118. method : str, optional
  5119. Which method to use, "boyett" or "patefield". If None (default),
  5120. selects the fastest method for this input.
  5121. %(_doc_random_state)s
  5122. Returns
  5123. -------
  5124. rvs : ndarray
  5125. Random 2D tables of shape (`size`, `len(row)`, `len(col)`).
  5126. Notes
  5127. -----
  5128. %(_doc_row_col_note)s
  5129. Examples
  5130. --------
  5131. >>> from scipy.stats import random_table
  5132. >>> row = [1, 5]
  5133. >>> col = [2, 3, 1]
  5134. >>> random_table.rvs(row, col, random_state=123)
  5135. array([[1., 0., 0.],
  5136. [1., 3., 1.]])
  5137. Alternatively, the object may be called (as a function) to fix the row
  5138. and column vector sums, returning a "frozen" distribution.
  5139. >>> d = random_table(row, col)
  5140. >>> d.rvs(random_state=123)
  5141. array([[1., 0., 0.],
  5142. [1., 3., 1.]])
  5143. """
  5144. r, c, n = self._process_parameters(row, col)
  5145. size, shape = self._process_size_shape(size, r, c)
  5146. random_state = self._get_random_state(random_state)
  5147. meth = self._process_rvs_method(method, r, c, n)
  5148. return meth(r, c, n, size, random_state).reshape(shape)
  5149. @staticmethod
  5150. def _process_parameters(row, col):
  5151. """
  5152. Check that row and column vectors are one-dimensional, that they do
  5153. not contain negative or non-integer entries, and that the sums over
  5154. both vectors are equal.
  5155. """
  5156. r = np.array(row, dtype=np.int64, copy=True)
  5157. c = np.array(col, dtype=np.int64, copy=True)
  5158. if np.ndim(r) != 1:
  5159. raise ValueError("`row` must be one-dimensional")
  5160. if np.ndim(c) != 1:
  5161. raise ValueError("`col` must be one-dimensional")
  5162. if np.any(r < 0):
  5163. raise ValueError("each element of `row` must be non-negative")
  5164. if np.any(c < 0):
  5165. raise ValueError("each element of `col` must be non-negative")
  5166. n = np.sum(r)
  5167. if n != np.sum(c):
  5168. raise ValueError("sums over `row` and `col` must be equal")
  5169. if not np.all(r == np.asarray(row)):
  5170. raise ValueError("each element of `row` must be an integer")
  5171. if not np.all(c == np.asarray(col)):
  5172. raise ValueError("each element of `col` must be an integer")
  5173. return r, c, n
  5174. @staticmethod
  5175. def _process_size_shape(size, r, c):
  5176. """
  5177. Compute the number of samples to be drawn and the shape of the output
  5178. """
  5179. shape = (len(r), len(c))
  5180. if size is None:
  5181. return 1, shape
  5182. size = np.atleast_1d(size)
  5183. if not np.issubdtype(size.dtype, np.integer) or np.any(size < 0):
  5184. raise ValueError("`size` must be a non-negative integer or `None`")
  5185. return np.prod(size), tuple(size) + shape
  5186. @classmethod
  5187. def _process_rvs_method(cls, method, r, c, n):
  5188. known_methods = {
  5189. None: cls._rvs_select(r, c, n),
  5190. "boyett": cls._rvs_boyett,
  5191. "patefield": cls._rvs_patefield,
  5192. }
  5193. try:
  5194. return known_methods[method]
  5195. except KeyError:
  5196. raise ValueError(f"'{method}' not recognized, "
  5197. f"must be one of {set(known_methods)}")
  5198. @classmethod
  5199. def _rvs_select(cls, r, c, n):
  5200. fac = 1.0 # benchmarks show that this value is about 1
  5201. k = len(r) * len(c) # number of cells
  5202. # n + 1 guards against failure if n == 0
  5203. if n > fac * np.log(n + 1) * k:
  5204. return cls._rvs_patefield
  5205. return cls._rvs_boyett
  5206. @staticmethod
  5207. def _rvs_boyett(row, col, ntot, size, random_state):
  5208. return _rcont.rvs_rcont1(row, col, ntot, size, random_state)
  5209. @staticmethod
  5210. def _rvs_patefield(row, col, ntot, size, random_state):
  5211. return _rcont.rvs_rcont2(row, col, ntot, size, random_state)
  5212. random_table = random_table_gen()
  5213. class random_table_frozen(multi_rv_frozen):
  5214. __class_getitem__ = None
  5215. def __init__(self, row, col, *, seed=None):
  5216. self._dist = random_table_gen(seed)
  5217. self._params = self._dist._process_parameters(row, col)
  5218. # monkey patch self._dist
  5219. def _process_parameters(r, c):
  5220. return self._params
  5221. self._dist._process_parameters = _process_parameters
  5222. def logpmf(self, x):
  5223. return self._dist.logpmf(x, None, None)
  5224. def pmf(self, x):
  5225. return self._dist.pmf(x, None, None)
  5226. def mean(self):
  5227. return self._dist.mean(None, None)
  5228. def rvs(self, size=None, method=None, random_state=None):
  5229. # optimisations are possible here
  5230. return self._dist.rvs(None, None, size=size, method=method,
  5231. random_state=random_state)
  5232. _ctab_doc_row_col = """\
  5233. row : array_like
  5234. Sum of table entries in each row.
  5235. col : array_like
  5236. Sum of table entries in each column."""
  5237. _ctab_doc_x = """\
  5238. x : array-like
  5239. Two-dimensional table of non-negative integers, or a
  5240. multi-dimensional array with the last two dimensions
  5241. corresponding with the tables."""
  5242. _ctab_doc_row_col_note = """\
  5243. The row and column vectors must be one-dimensional, not empty,
  5244. and each sum up to the same value. They cannot contain negative
  5245. or noninteger entries."""
  5246. _ctab_doc_mean_params = f"""
  5247. Parameters
  5248. ----------
  5249. {_ctab_doc_row_col}"""
  5250. _ctab_doc_row_col_note_frozen = """\
  5251. See class definition for a detailed description of parameters."""
  5252. _ctab_docdict = {
  5253. "_doc_random_state": _doc_random_state,
  5254. "_doc_row_col": _ctab_doc_row_col,
  5255. "_doc_x": _ctab_doc_x,
  5256. "_doc_mean_params": _ctab_doc_mean_params,
  5257. "_doc_row_col_note": _ctab_doc_row_col_note,
  5258. }
  5259. _ctab_docdict_frozen = _ctab_docdict.copy()
  5260. _ctab_docdict_frozen.update({
  5261. "_doc_row_col": "",
  5262. "_doc_mean_params": "",
  5263. "_doc_row_col_note": _ctab_doc_row_col_note_frozen,
  5264. })
  5265. def _docfill(obj, docdict, template=None):
  5266. obj.__doc__ = doccer.docformat(template or obj.__doc__, docdict)
  5267. # Set frozen generator docstrings from corresponding docstrings in
  5268. # random_table and fill in default strings in class docstrings
  5269. _docfill(random_table_gen, _ctab_docdict)
  5270. for name in ['logpmf', 'pmf', 'mean', 'rvs']:
  5271. method = random_table_gen.__dict__[name]
  5272. method_frozen = random_table_frozen.__dict__[name]
  5273. _docfill(method_frozen, _ctab_docdict_frozen, method.__doc__)
  5274. _docfill(method, _ctab_docdict)
  5275. class uniform_direction_gen(multi_rv_generic):
  5276. r"""A vector-valued uniform direction.
  5277. Return a random direction (unit vector). The `dim` keyword specifies
  5278. the dimensionality of the space.
  5279. Methods
  5280. -------
  5281. rvs(dim=None, size=1, random_state=None)
  5282. Draw random directions.
  5283. Parameters
  5284. ----------
  5285. dim : scalar
  5286. Dimension of directions.
  5287. seed : {None, int, `numpy.random.Generator`,
  5288. `numpy.random.RandomState`}, optional
  5289. Used for drawing random variates.
  5290. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  5291. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  5292. with seed.
  5293. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  5294. then that object is used.
  5295. Default is `None`.
  5296. Notes
  5297. -----
  5298. This distribution generates unit vectors uniformly distributed on
  5299. the surface of a hypersphere. These can be interpreted as random
  5300. directions.
  5301. For example, if `dim` is 3, 3D vectors from the surface of :math:`S^2`
  5302. will be sampled.
  5303. References
  5304. ----------
  5305. .. [1] Marsaglia, G. (1972). "Choosing a Point from the Surface of a
  5306. Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.
  5307. Examples
  5308. --------
  5309. >>> import numpy as np
  5310. >>> from scipy.stats import uniform_direction
  5311. >>> x = uniform_direction.rvs(3)
  5312. >>> np.linalg.norm(x)
  5313. 1.
  5314. This generates one random direction, a vector on the surface of
  5315. :math:`S^2`.
  5316. Alternatively, the object may be called (as a function) to return a frozen
  5317. distribution with fixed `dim` parameter. Here,
  5318. we create a `uniform_direction` with ``dim=3`` and draw 5 observations.
  5319. The samples are then arranged in an array of shape 5x3.
  5320. >>> rng = np.random.default_rng()
  5321. >>> uniform_sphere_dist = uniform_direction(3)
  5322. >>> unit_vectors = uniform_sphere_dist.rvs(5, random_state=rng)
  5323. >>> unit_vectors
  5324. array([[ 0.56688642, -0.1332634 , -0.81294566],
  5325. [-0.427126 , -0.74779278, 0.50830044],
  5326. [ 0.3793989 , 0.92346629, 0.05715323],
  5327. [ 0.36428383, -0.92449076, -0.11231259],
  5328. [-0.27733285, 0.94410968, -0.17816678]])
  5329. """
  5330. def __init__(self, seed=None):
  5331. super().__init__(seed)
  5332. self.__doc__ = doccer.docformat(self.__doc__)
  5333. def __call__(self, dim=None, seed=None):
  5334. """Create a frozen n-dimensional uniform direction distribution.
  5335. See `uniform_direction` for more information.
  5336. """
  5337. return uniform_direction_frozen(dim, seed=seed)
  5338. def _process_parameters(self, dim):
  5339. """Dimension N must be specified; it cannot be inferred."""
  5340. if dim is None or not np.isscalar(dim) or dim < 1 or dim != int(dim):
  5341. raise ValueError("Dimension of vector must be specified, "
  5342. "and must be an integer greater than 0.")
  5343. return int(dim)
  5344. def rvs(self, dim, size=None, random_state=None):
  5345. """Draw random samples from S(N-1).
  5346. Parameters
  5347. ----------
  5348. dim : integer
  5349. Dimension of space (N).
  5350. size : int or tuple of ints, optional
  5351. Given a shape of, for example, (m,n,k), m*n*k samples are
  5352. generated, and packed in an m-by-n-by-k arrangement.
  5353. Because each sample is N-dimensional, the output shape
  5354. is (m,n,k,N). If no shape is specified, a single (N-D)
  5355. sample is returned.
  5356. random_state : {None, int, `numpy.random.Generator`,
  5357. `numpy.random.RandomState`}, optional
  5358. Pseudorandom number generator state used to generate resamples.
  5359. If `random_state` is ``None`` (or `np.random`), the
  5360. `numpy.random.RandomState` singleton is used.
  5361. If `random_state` is an int, a new ``RandomState`` instance is
  5362. used, seeded with `random_state`.
  5363. If `random_state` is already a ``Generator`` or ``RandomState``
  5364. instance then that instance is used.
  5365. Returns
  5366. -------
  5367. rvs : ndarray
  5368. Random direction vectors
  5369. """
  5370. random_state = self._get_random_state(random_state)
  5371. if size is None:
  5372. size = np.array([], dtype=int)
  5373. size = np.atleast_1d(size)
  5374. dim = self._process_parameters(dim)
  5375. samples = _sample_uniform_direction(dim, size, random_state)
  5376. return samples
  5377. uniform_direction = uniform_direction_gen()
  5378. class uniform_direction_frozen(multi_rv_frozen):
  5379. def __init__(self, dim=None, seed=None):
  5380. """Create a frozen n-dimensional uniform direction distribution.
  5381. Parameters
  5382. ----------
  5383. dim : int
  5384. Dimension of matrices
  5385. seed : {None, int, `numpy.random.Generator`,
  5386. `numpy.random.RandomState`}, optional
  5387. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  5388. singleton is used.
  5389. If `seed` is an int, a new ``RandomState`` instance is used,
  5390. seeded with `seed`.
  5391. If `seed` is already a ``Generator`` or ``RandomState`` instance
  5392. then that instance is used.
  5393. Examples
  5394. --------
  5395. >>> from scipy.stats import uniform_direction
  5396. >>> x = uniform_direction(3)
  5397. >>> x.rvs()
  5398. """
  5399. self._dist = uniform_direction_gen(seed)
  5400. self.dim = self._dist._process_parameters(dim)
  5401. def rvs(self, size=None, random_state=None):
  5402. return self._dist.rvs(self.dim, size, random_state)
  5403. def _sample_uniform_direction(dim, size, random_state):
  5404. """
  5405. Private method to generate uniform directions
  5406. Reference: Marsaglia, G. (1972). "Choosing a Point from the Surface of a
  5407. Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.
  5408. """
  5409. samples_shape = np.append(size, dim)
  5410. samples = random_state.standard_normal(samples_shape)
  5411. samples /= np.linalg.norm(samples, axis=-1, keepdims=True)
  5412. return samples
  5413. _dirichlet_mn_doc_default_callparams = """\
  5414. alpha : array_like
  5415. The concentration parameters. The number of entries along the last axis
  5416. determines the dimensionality of the distribution. Each entry must be
  5417. strictly positive.
  5418. n : int or array_like
  5419. The number of trials. Each element must be a non-negative integer.
  5420. """
  5421. _dirichlet_mn_doc_frozen_callparams = ""
  5422. _dirichlet_mn_doc_frozen_callparams_note = """\
  5423. See class definition for a detailed description of parameters."""
  5424. dirichlet_mn_docdict_params = {
  5425. '_dirichlet_mn_doc_default_callparams': _dirichlet_mn_doc_default_callparams,
  5426. '_doc_random_state': _doc_random_state
  5427. }
  5428. dirichlet_mn_docdict_noparams = {
  5429. '_dirichlet_mn_doc_default_callparams': _dirichlet_mn_doc_frozen_callparams,
  5430. '_doc_random_state': _doc_random_state
  5431. }
  5432. def _dirichlet_multinomial_check_parameters(alpha, n, x=None):
  5433. alpha = np.asarray(alpha)
  5434. n = np.asarray(n)
  5435. if x is not None:
  5436. # Ensure that `x` and `alpha` are arrays. If the shapes are
  5437. # incompatible, NumPy will raise an appropriate error.
  5438. try:
  5439. x, alpha = np.broadcast_arrays(x, alpha)
  5440. except ValueError as e:
  5441. msg = "`x` and `alpha` must be broadcastable."
  5442. raise ValueError(msg) from e
  5443. x_int = np.floor(x)
  5444. if np.any(x < 0) or np.any(x != x_int):
  5445. raise ValueError("`x` must contain only non-negative integers.")
  5446. x = x_int
  5447. if np.any(alpha <= 0):
  5448. raise ValueError("`alpha` must contain only positive values.")
  5449. n_int = np.floor(n)
  5450. if np.any(n < 0) or np.any(n != n_int):
  5451. raise ValueError("`n` must be a non-negative integer.")
  5452. n = n_int
  5453. sum_alpha = np.sum(alpha, axis=-1)
  5454. sum_alpha, n = np.broadcast_arrays(sum_alpha, n)
  5455. return (alpha, sum_alpha, n) if x is None else (alpha, sum_alpha, n, x)
  5456. class dirichlet_multinomial_gen(multi_rv_generic):
  5457. r"""A Dirichlet multinomial random variable.
  5458. The Dirichlet multinomial distribution is a compound probability
  5459. distribution: it is the multinomial distribution with number of trials
  5460. `n` and class probabilities ``p`` randomly sampled from a Dirichlet
  5461. distribution with concentration parameters ``alpha``.
  5462. Methods
  5463. -------
  5464. logpmf(x, alpha, n):
  5465. Log of the probability mass function.
  5466. pmf(x, alpha, n):
  5467. Probability mass function.
  5468. mean(alpha, n):
  5469. Mean of the Dirichlet multinomial distribution.
  5470. var(alpha, n):
  5471. Variance of the Dirichlet multinomial distribution.
  5472. cov(alpha, n):
  5473. The covariance of the Dirichlet multinomial distribution.
  5474. Parameters
  5475. ----------
  5476. %(_dirichlet_mn_doc_default_callparams)s
  5477. %(_doc_random_state)s
  5478. See Also
  5479. --------
  5480. scipy.stats.dirichlet : The dirichlet distribution.
  5481. scipy.stats.multinomial : The multinomial distribution.
  5482. References
  5483. ----------
  5484. .. [1] Dirichlet-multinomial distribution, Wikipedia,
  5485. https://www.wikipedia.org/wiki/Dirichlet-multinomial_distribution
  5486. Examples
  5487. --------
  5488. >>> from scipy.stats import dirichlet_multinomial
  5489. Get the PMF
  5490. >>> n = 6 # number of trials
  5491. >>> alpha = [3, 4, 5] # concentration parameters
  5492. >>> x = [1, 2, 3] # counts
  5493. >>> dirichlet_multinomial.pmf(x, alpha, n)
  5494. 0.08484162895927604
  5495. If the sum of category counts does not equal the number of trials,
  5496. the probability mass is zero.
  5497. >>> dirichlet_multinomial.pmf(x, alpha, n=7)
  5498. 0.0
  5499. Get the log of the PMF
  5500. >>> dirichlet_multinomial.logpmf(x, alpha, n)
  5501. -2.4669689491013327
  5502. Get the mean
  5503. >>> dirichlet_multinomial.mean(alpha, n)
  5504. array([1.5, 2. , 2.5])
  5505. Get the variance
  5506. >>> dirichlet_multinomial.var(alpha, n)
  5507. array([1.55769231, 1.84615385, 2.01923077])
  5508. Get the covariance
  5509. >>> dirichlet_multinomial.cov(alpha, n)
  5510. array([[ 1.55769231, -0.69230769, -0.86538462],
  5511. [-0.69230769, 1.84615385, -1.15384615],
  5512. [-0.86538462, -1.15384615, 2.01923077]])
  5513. Alternatively, the object may be called (as a function) to fix the
  5514. `alpha` and `n` parameters, returning a "frozen" Dirichlet multinomial
  5515. random variable.
  5516. >>> dm = dirichlet_multinomial(alpha, n)
  5517. >>> dm.pmf(x)
  5518. 0.08484162895927579
  5519. All methods are fully vectorized. Each element of `x` and `alpha` is
  5520. a vector (along the last axis), each element of `n` is an
  5521. integer (scalar), and the result is computed element-wise.
  5522. >>> x = [[1, 2, 3], [4, 5, 6]]
  5523. >>> alpha = [[1, 2, 3], [4, 5, 6]]
  5524. >>> n = [6, 15]
  5525. >>> dirichlet_multinomial.pmf(x, alpha, n)
  5526. array([0.06493506, 0.02626937])
  5527. >>> dirichlet_multinomial.cov(alpha, n).shape # both covariance matrices
  5528. (2, 3, 3)
  5529. Broadcasting according to standard NumPy conventions is supported. Here,
  5530. we have four sets of concentration parameters (each a two element vector)
  5531. for each of three numbers of trials (each a scalar).
  5532. >>> alpha = [[3, 4], [4, 5], [5, 6], [6, 7]]
  5533. >>> n = [[6], [7], [8]]
  5534. >>> dirichlet_multinomial.mean(alpha, n).shape
  5535. (3, 4, 2)
  5536. """
  5537. def __init__(self, seed=None):
  5538. super().__init__(seed)
  5539. self.__doc__ = doccer.docformat(self.__doc__,
  5540. dirichlet_mn_docdict_params)
  5541. def __call__(self, alpha, n, seed=None):
  5542. return dirichlet_multinomial_frozen(alpha, n, seed=seed)
  5543. def logpmf(self, x, alpha, n):
  5544. """The log of the probability mass function.
  5545. Parameters
  5546. ----------
  5547. x: ndarray
  5548. Category counts (non-negative integers). Must be broadcastable
  5549. with shape parameter ``alpha``. If multidimensional, the last axis
  5550. must correspond with the categories.
  5551. %(_dirichlet_mn_doc_default_callparams)s
  5552. Returns
  5553. -------
  5554. out: ndarray or scalar
  5555. Log of the probability mass function.
  5556. """
  5557. a, Sa, n, x = _dirichlet_multinomial_check_parameters(alpha, n, x)
  5558. out = np.asarray(loggamma(Sa) + loggamma(n + 1) - loggamma(n + Sa))
  5559. out += (loggamma(x + a) - (loggamma(a) + loggamma(x + 1))).sum(axis=-1)
  5560. np.place(out, n != x.sum(axis=-1), -np.inf)
  5561. return out[()]
  5562. def pmf(self, x, alpha, n):
  5563. """Probability mass function for a Dirichlet multinomial distribution.
  5564. Parameters
  5565. ----------
  5566. x: ndarray
  5567. Category counts (non-negative integers). Must be broadcastable
  5568. with shape parameter ``alpha``. If multidimensional, the last axis
  5569. must correspond with the categories.
  5570. %(_dirichlet_mn_doc_default_callparams)s
  5571. Returns
  5572. -------
  5573. out: ndarray or scalar
  5574. Probability mass function.
  5575. """
  5576. return np.exp(self.logpmf(x, alpha, n))
  5577. def mean(self, alpha, n):
  5578. """Mean of a Dirichlet multinomial distribution.
  5579. Parameters
  5580. ----------
  5581. %(_dirichlet_mn_doc_default_callparams)s
  5582. Returns
  5583. -------
  5584. out: ndarray
  5585. Mean of a Dirichlet multinomial distribution.
  5586. """
  5587. a, Sa, n = _dirichlet_multinomial_check_parameters(alpha, n)
  5588. n, Sa = n[..., np.newaxis], Sa[..., np.newaxis]
  5589. return n * a / Sa
  5590. def var(self, alpha, n):
  5591. """The variance of the Dirichlet multinomial distribution.
  5592. Parameters
  5593. ----------
  5594. %(_dirichlet_mn_doc_default_callparams)s
  5595. Returns
  5596. -------
  5597. out: array_like
  5598. The variances of the components of the distribution. This is
  5599. the diagonal of the covariance matrix of the distribution.
  5600. """
  5601. a, Sa, n = _dirichlet_multinomial_check_parameters(alpha, n)
  5602. n, Sa = n[..., np.newaxis], Sa[..., np.newaxis]
  5603. return n * a / Sa * (1 - a/Sa) * (n + Sa) / (1 + Sa)
  5604. def cov(self, alpha, n):
  5605. """Covariance matrix of a Dirichlet multinomial distribution.
  5606. Parameters
  5607. ----------
  5608. %(_dirichlet_mn_doc_default_callparams)s
  5609. Returns
  5610. -------
  5611. out : array_like
  5612. The covariance matrix of the distribution.
  5613. """
  5614. a, Sa, n = _dirichlet_multinomial_check_parameters(alpha, n)
  5615. var = dirichlet_multinomial.var(a, n)
  5616. n, Sa = n[..., np.newaxis, np.newaxis], Sa[..., np.newaxis, np.newaxis]
  5617. aiaj = a[..., :, np.newaxis] * a[..., np.newaxis, :]
  5618. cov = -n * aiaj / Sa ** 2 * (n + Sa) / (1 + Sa)
  5619. ii = np.arange(cov.shape[-1])
  5620. cov[..., ii, ii] = var
  5621. return cov
  5622. dirichlet_multinomial = dirichlet_multinomial_gen()
  5623. class dirichlet_multinomial_frozen(multi_rv_frozen):
  5624. def __init__(self, alpha, n, seed=None):
  5625. alpha, Sa, n = _dirichlet_multinomial_check_parameters(alpha, n)
  5626. self.alpha = alpha
  5627. self.n = n
  5628. self._dist = dirichlet_multinomial_gen(seed)
  5629. def logpmf(self, x):
  5630. return self._dist.logpmf(x, self.alpha, self.n)
  5631. def pmf(self, x):
  5632. return self._dist.pmf(x, self.alpha, self.n)
  5633. def mean(self):
  5634. return self._dist.mean(self.alpha, self.n)
  5635. def var(self):
  5636. return self._dist.var(self.alpha, self.n)
  5637. def cov(self):
  5638. return self._dist.cov(self.alpha, self.n)
  5639. # Set frozen generator docstrings from corresponding docstrings in
  5640. # dirichlet_multinomial and fill in default strings in class docstrings.
  5641. for name in ['logpmf', 'pmf', 'mean', 'var', 'cov']:
  5642. method = dirichlet_multinomial_gen.__dict__[name]
  5643. method_frozen = dirichlet_multinomial_frozen.__dict__[name]
  5644. method_frozen.__doc__ = doccer.docformat(
  5645. method.__doc__, dirichlet_mn_docdict_noparams)
  5646. method.__doc__ = doccer.docformat(method.__doc__,
  5647. dirichlet_mn_docdict_params)
  5648. class vonmises_fisher_gen(multi_rv_generic):
  5649. r"""A von Mises-Fisher variable.
  5650. The `mu` keyword specifies the mean direction vector. The `kappa` keyword
  5651. specifies the concentration parameter.
  5652. Methods
  5653. -------
  5654. pdf(x, mu=None, kappa=1)
  5655. Probability density function.
  5656. logpdf(x, mu=None, kappa=1)
  5657. Log of the probability density function.
  5658. rvs(mu=None, kappa=1, size=1, random_state=None)
  5659. Draw random samples from a von Mises-Fisher distribution.
  5660. entropy(mu=None, kappa=1)
  5661. Compute the differential entropy of the von Mises-Fisher distribution.
  5662. fit(data)
  5663. Fit a von Mises-Fisher distribution to data.
  5664. Parameters
  5665. ----------
  5666. mu : array_like
  5667. Mean direction of the distribution. Must be a one-dimensional unit
  5668. vector of norm 1.
  5669. kappa : float
  5670. Concentration parameter. Must be positive.
  5671. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  5672. Used for drawing random variates.
  5673. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  5674. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  5675. with seed.
  5676. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  5677. then that object is used.
  5678. Default is `None`.
  5679. See Also
  5680. --------
  5681. scipy.stats.vonmises : Von-Mises Fisher distribution in 2D on a circle
  5682. uniform_direction : uniform distribution on the surface of a hypersphere
  5683. Notes
  5684. -----
  5685. The von Mises-Fisher distribution is a directional distribution on the
  5686. surface of the unit hypersphere. The probability density
  5687. function of a unit vector :math:`\mathbf{x}` is
  5688. .. math::
  5689. f(\mathbf{x}) = \frac{\kappa^{d/2-1}}{(2\pi)^{d/2}I_{d/2-1}(\kappa)}
  5690. \exp\left(\kappa \mathbf{\mu}^T\mathbf{x}\right),
  5691. where :math:`\mathbf{\mu}` is the mean direction, :math:`\kappa` the
  5692. concentration parameter, :math:`d` the dimension and :math:`I` the
  5693. modified Bessel function of the first kind. As :math:`\mu` represents
  5694. a direction, it must be a unit vector or in other words, a point
  5695. on the hypersphere: :math:`\mathbf{\mu}\in S^{d-1}`. :math:`\kappa` is a
  5696. concentration parameter, which means that it must be positive
  5697. (:math:`\kappa>0`) and that the distribution becomes more narrow with
  5698. increasing :math:`\kappa`. In that sense, the reciprocal value
  5699. :math:`1/\kappa` resembles the variance parameter of the normal
  5700. distribution.
  5701. The von Mises-Fisher distribution often serves as an analogue of the
  5702. normal distribution on the sphere. Intuitively, for unit vectors, a
  5703. useful distance measure is given by the angle :math:`\alpha` between
  5704. them. This is exactly what the scalar product
  5705. :math:`\mathbf{\mu}^T\mathbf{x}=\cos(\alpha)` in the
  5706. von Mises-Fisher probability density function describes: the angle
  5707. between the mean direction :math:`\mathbf{\mu}` and the vector
  5708. :math:`\mathbf{x}`. The larger the angle between them, the smaller the
  5709. probability to observe :math:`\mathbf{x}` for this particular mean
  5710. direction :math:`\mathbf{\mu}`.
  5711. In dimensions 2 and 3, specialized algorithms are used for fast sampling
  5712. [2]_, [3]_. For dimensions of 4 or higher the rejection sampling algorithm
  5713. described in [4]_ is utilized. This implementation is partially based on
  5714. the geomstats package [5]_, [6]_.
  5715. .. versionadded:: 1.11
  5716. References
  5717. ----------
  5718. .. [1] Von Mises-Fisher distribution, Wikipedia,
  5719. https://en.wikipedia.org/wiki/Von_Mises%E2%80%93Fisher_distribution
  5720. .. [2] Mardia, K., and Jupp, P. Directional statistics. Wiley, 2000.
  5721. .. [3] J. Wenzel. Numerically stable sampling of the von Mises Fisher
  5722. distribution on S2.
  5723. https://www.mitsuba-renderer.org/~wenzel/files/vmf.pdf
  5724. .. [4] Wood, A. Simulation of the von mises fisher distribution.
  5725. Communications in statistics-simulation and computation 23,
  5726. 1 (1994), 157-164. https://doi.org/10.1080/03610919408813161
  5727. .. [5] geomstats, Github. MIT License. Accessed: 06.01.2023.
  5728. https://github.com/geomstats/geomstats
  5729. .. [6] Miolane, N. et al. Geomstats: A Python Package for Riemannian
  5730. Geometry in Machine Learning. Journal of Machine Learning Research
  5731. 21 (2020). http://jmlr.org/papers/v21/19-027.html
  5732. Examples
  5733. --------
  5734. **Visualization of the probability density**
  5735. Plot the probability density in three dimensions for increasing
  5736. concentration parameter. The density is calculated by the ``pdf``
  5737. method.
  5738. >>> import numpy as np
  5739. >>> import matplotlib.pyplot as plt
  5740. >>> from scipy.stats import vonmises_fisher
  5741. >>> from matplotlib.colors import Normalize
  5742. >>> n_grid = 100
  5743. >>> u = np.linspace(0, np.pi, n_grid)
  5744. >>> v = np.linspace(0, 2 * np.pi, n_grid)
  5745. >>> u_grid, v_grid = np.meshgrid(u, v)
  5746. >>> vertices = np.stack([np.cos(v_grid) * np.sin(u_grid),
  5747. ... np.sin(v_grid) * np.sin(u_grid),
  5748. ... np.cos(u_grid)],
  5749. ... axis=2)
  5750. >>> x = np.outer(np.cos(v), np.sin(u))
  5751. >>> y = np.outer(np.sin(v), np.sin(u))
  5752. >>> z = np.outer(np.ones_like(u), np.cos(u))
  5753. >>> def plot_vmf_density(ax, x, y, z, vertices, mu, kappa):
  5754. ... vmf = vonmises_fisher(mu, kappa)
  5755. ... pdf_values = vmf.pdf(vertices)
  5756. ... pdfnorm = Normalize(vmin=pdf_values.min(), vmax=pdf_values.max())
  5757. ... ax.plot_surface(x, y, z, rstride=1, cstride=1,
  5758. ... facecolors=plt.cm.viridis(pdfnorm(pdf_values)),
  5759. ... linewidth=0)
  5760. ... ax.set_aspect('equal')
  5761. ... ax.view_init(azim=-130, elev=0)
  5762. ... ax.axis('off')
  5763. ... ax.set_title(rf"$\kappa={kappa}$")
  5764. >>> fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(9, 4),
  5765. ... subplot_kw={"projection": "3d"})
  5766. >>> left, middle, right = axes
  5767. >>> mu = np.array([-np.sqrt(0.5), -np.sqrt(0.5), 0])
  5768. >>> plot_vmf_density(left, x, y, z, vertices, mu, 5)
  5769. >>> plot_vmf_density(middle, x, y, z, vertices, mu, 20)
  5770. >>> plot_vmf_density(right, x, y, z, vertices, mu, 100)
  5771. >>> plt.subplots_adjust(top=1, bottom=0.0, left=0.0, right=1.0, wspace=0.)
  5772. >>> plt.show()
  5773. As we increase the concentration parameter, the points are getting more
  5774. clustered together around the mean direction.
  5775. **Sampling**
  5776. Draw 5 samples from the distribution using the ``rvs`` method resulting
  5777. in a 5x3 array.
  5778. >>> rng = np.random.default_rng()
  5779. >>> mu = np.array([0, 0, 1])
  5780. >>> samples = vonmises_fisher(mu, 20).rvs(5, random_state=rng)
  5781. >>> samples
  5782. array([[ 0.3884594 , -0.32482588, 0.86231516],
  5783. [ 0.00611366, -0.09878289, 0.99509023],
  5784. [-0.04154772, -0.01637135, 0.99900239],
  5785. [-0.14613735, 0.12553507, 0.98126695],
  5786. [-0.04429884, -0.23474054, 0.97104814]])
  5787. These samples are unit vectors on the sphere :math:`S^2`. To verify,
  5788. let us calculate their euclidean norms:
  5789. >>> np.linalg.norm(samples, axis=1)
  5790. array([1., 1., 1., 1., 1.])
  5791. Plot 20 observations drawn from the von Mises-Fisher distribution for
  5792. increasing concentration parameter :math:`\kappa`. The red dot highlights
  5793. the mean direction :math:`\mu`.
  5794. >>> def plot_vmf_samples(ax, x, y, z, mu, kappa):
  5795. ... vmf = vonmises_fisher(mu, kappa)
  5796. ... samples = vmf.rvs(20)
  5797. ... ax.plot_surface(x, y, z, rstride=1, cstride=1, linewidth=0,
  5798. ... alpha=0.2)
  5799. ... ax.scatter(samples[:, 0], samples[:, 1], samples[:, 2], c='k', s=5)
  5800. ... ax.scatter(mu[0], mu[1], mu[2], c='r', s=30)
  5801. ... ax.set_aspect('equal')
  5802. ... ax.view_init(azim=-130, elev=0)
  5803. ... ax.axis('off')
  5804. ... ax.set_title(rf"$\kappa={kappa}$")
  5805. >>> mu = np.array([-np.sqrt(0.5), -np.sqrt(0.5), 0])
  5806. >>> fig, axes = plt.subplots(nrows=1, ncols=3,
  5807. ... subplot_kw={"projection": "3d"},
  5808. ... figsize=(9, 4))
  5809. >>> left, middle, right = axes
  5810. >>> plot_vmf_samples(left, x, y, z, mu, 5)
  5811. >>> plot_vmf_samples(middle, x, y, z, mu, 20)
  5812. >>> plot_vmf_samples(right, x, y, z, mu, 100)
  5813. >>> plt.subplots_adjust(top=1, bottom=0.0, left=0.0,
  5814. ... right=1.0, wspace=0.)
  5815. >>> plt.show()
  5816. The plots show that with increasing concentration :math:`\kappa` the
  5817. resulting samples are centered more closely around the mean direction.
  5818. **Fitting the distribution parameters**
  5819. The distribution can be fitted to data using the ``fit`` method returning
  5820. the estimated parameters. As a toy example let's fit the distribution to
  5821. samples drawn from a known von Mises-Fisher distribution.
  5822. >>> mu, kappa = np.array([0, 0, 1]), 20
  5823. >>> samples = vonmises_fisher(mu, kappa).rvs(1000, random_state=rng)
  5824. >>> mu_fit, kappa_fit = vonmises_fisher.fit(samples)
  5825. >>> mu_fit, kappa_fit
  5826. (array([0.01126519, 0.01044501, 0.99988199]), 19.306398751730995)
  5827. We see that the estimated parameters `mu_fit` and `kappa_fit` are
  5828. very close to the ground truth parameters.
  5829. """
  5830. def __init__(self, seed=None):
  5831. super().__init__(seed)
  5832. def __call__(self, mu=None, kappa=1, seed=None):
  5833. """Create a frozen von Mises-Fisher distribution.
  5834. See `vonmises_fisher_frozen` for more information.
  5835. """
  5836. return vonmises_fisher_frozen(mu, kappa, seed=seed)
  5837. def _process_parameters(self, mu, kappa):
  5838. """
  5839. Infer dimensionality from mu and ensure that mu is a one-dimensional
  5840. unit vector and kappa positive.
  5841. """
  5842. mu = np.asarray(mu)
  5843. if mu.ndim > 1:
  5844. raise ValueError("'mu' must have one-dimensional shape.")
  5845. if not np.allclose(np.linalg.norm(mu), 1.):
  5846. raise ValueError("'mu' must be a unit vector of norm 1.")
  5847. if not mu.size > 1:
  5848. raise ValueError("'mu' must have at least two entries.")
  5849. kappa_error_msg = "'kappa' must be a positive scalar."
  5850. if not np.isscalar(kappa) or kappa < 0:
  5851. raise ValueError(kappa_error_msg)
  5852. if float(kappa) == 0.:
  5853. raise ValueError("For 'kappa=0' the von Mises-Fisher distribution "
  5854. "becomes the uniform distribution on the sphere "
  5855. "surface. Consider using "
  5856. "'scipy.stats.uniform_direction' instead.")
  5857. dim = mu.size
  5858. return dim, mu, kappa
  5859. def _check_data_vs_dist(self, x, dim):
  5860. if x.shape[-1] != dim:
  5861. raise ValueError("The dimensionality of the last axis of 'x' must "
  5862. "match the dimensionality of the "
  5863. "von Mises Fisher distribution.")
  5864. if not np.allclose(np.linalg.norm(x, axis=-1), 1.):
  5865. msg = "'x' must be unit vectors of norm 1 along last dimension."
  5866. raise ValueError(msg)
  5867. def _log_norm_factor(self, dim, kappa):
  5868. # normalization factor is given by
  5869. # c = kappa**(dim/2-1)/((2*pi)**(dim/2)*I[dim/2-1](kappa))
  5870. # = kappa**(dim/2-1)*exp(-kappa) /
  5871. # ((2*pi)**(dim/2)*I[dim/2-1](kappa)*exp(-kappa)
  5872. # = kappa**(dim/2-1)*exp(-kappa) /
  5873. # ((2*pi)**(dim/2)*ive[dim/2-1](kappa)
  5874. # Then the log is given by
  5875. # log c = 1/2*(dim -1)*log(kappa) - kappa - -1/2*dim*ln(2*pi) -
  5876. # ive[dim/2-1](kappa)
  5877. halfdim = 0.5 * dim
  5878. return (0.5 * (dim - 2)*np.log(kappa) - halfdim * _LOG_2PI -
  5879. np.log(ive(halfdim - 1, kappa)) - kappa)
  5880. def _logpdf(self, x, dim, mu, kappa):
  5881. """Log of the von Mises-Fisher probability density function.
  5882. As this function does no argument checking, it should not be
  5883. called directly; use 'logpdf' instead.
  5884. """
  5885. x = np.asarray(x)
  5886. self._check_data_vs_dist(x, dim)
  5887. dotproducts = np.einsum('i,...i->...', mu, x)
  5888. return self._log_norm_factor(dim, kappa) + kappa * dotproducts
  5889. def logpdf(self, x, mu=None, kappa=1):
  5890. """Log of the von Mises-Fisher probability density function.
  5891. Parameters
  5892. ----------
  5893. x : array_like
  5894. Points at which to evaluate the log of the probability
  5895. density function. The last axis of `x` must correspond
  5896. to unit vectors of the same dimensionality as the distribution.
  5897. mu : array_like, default: None
  5898. Mean direction of the distribution. Must be a one-dimensional unit
  5899. vector of norm 1.
  5900. kappa : float, default: 1
  5901. Concentration parameter. Must be positive.
  5902. Returns
  5903. -------
  5904. logpdf : ndarray or scalar
  5905. Log of the probability density function evaluated at `x`.
  5906. """
  5907. dim, mu, kappa = self._process_parameters(mu, kappa)
  5908. return self._logpdf(x, dim, mu, kappa)
  5909. def pdf(self, x, mu=None, kappa=1):
  5910. """Von Mises-Fisher probability density function.
  5911. Parameters
  5912. ----------
  5913. x : array_like
  5914. Points at which to evaluate the probability
  5915. density function. The last axis of `x` must correspond
  5916. to unit vectors of the same dimensionality as the distribution.
  5917. mu : array_like
  5918. Mean direction of the distribution. Must be a one-dimensional unit
  5919. vector of norm 1.
  5920. kappa : float
  5921. Concentration parameter. Must be positive.
  5922. Returns
  5923. -------
  5924. pdf : ndarray or scalar
  5925. Probability density function evaluated at `x`.
  5926. """
  5927. dim, mu, kappa = self._process_parameters(mu, kappa)
  5928. return np.exp(self._logpdf(x, dim, mu, kappa))
  5929. def _rvs_2d(self, mu, kappa, size, random_state):
  5930. """
  5931. In 2D, the von Mises-Fisher distribution reduces to the
  5932. von Mises distribution which can be efficiently sampled by numpy.
  5933. This method is much faster than the general rejection
  5934. sampling based algorithm.
  5935. """
  5936. mean_angle = np.arctan2(mu[1], mu[0])
  5937. angle_samples = random_state.vonmises(mean_angle, kappa, size=size)
  5938. samples = np.stack([np.cos(angle_samples), np.sin(angle_samples)],
  5939. axis=-1)
  5940. return samples
  5941. def _rvs_3d(self, kappa, size, random_state):
  5942. """
  5943. Generate samples from a von Mises-Fisher distribution
  5944. with mu = [1, 0, 0] and kappa. Samples then have to be
  5945. rotated towards the desired mean direction mu.
  5946. This method is much faster than the general rejection
  5947. sampling based algorithm.
  5948. Reference: https://www.mitsuba-renderer.org/~wenzel/files/vmf.pdf
  5949. """
  5950. if size is None:
  5951. sample_size = 1
  5952. else:
  5953. sample_size = size
  5954. # compute x coordinate acc. to equation from section 3.1
  5955. x = random_state.random(sample_size)
  5956. x = 1. + np.log(x + (1. - x) * np.exp(-2 * kappa))/kappa
  5957. # (y, z) are random 2D vectors that only have to be
  5958. # normalized accordingly. Then (x, y z) follow a VMF distribution
  5959. temp = np.sqrt(1. - np.square(x))
  5960. uniformcircle = _sample_uniform_direction(2, sample_size, random_state)
  5961. samples = np.stack([x, temp * uniformcircle[..., 0],
  5962. temp * uniformcircle[..., 1]],
  5963. axis=-1)
  5964. if size is None:
  5965. samples = np.squeeze(samples)
  5966. return samples
  5967. def _rejection_sampling(self, dim, kappa, size, random_state):
  5968. """
  5969. Generate samples from an n-dimensional von Mises-Fisher distribution
  5970. with mu = [1, 0, ..., 0] and kappa via rejection sampling.
  5971. Samples then have to be rotated towards the desired mean direction mu.
  5972. Reference: https://doi.org/10.1080/03610919408813161
  5973. """
  5974. dim_minus_one = dim - 1
  5975. # calculate number of requested samples
  5976. if size is not None:
  5977. if not np.iterable(size):
  5978. size = (size, )
  5979. n_samples = math.prod(size)
  5980. else:
  5981. n_samples = 1
  5982. # calculate envelope for rejection sampler (eq. 4)
  5983. sqrt = np.sqrt(4 * kappa ** 2. + dim_minus_one ** 2)
  5984. envelop_param = (-2 * kappa + sqrt) / dim_minus_one
  5985. if envelop_param == 0:
  5986. # the regular formula suffers from loss of precision for high
  5987. # kappa. This can only be detected by checking for 0 here.
  5988. # Workaround: expansion for sqrt variable
  5989. # https://www.wolframalpha.com/input?i=sqrt%284*x%5E2%2Bd%5E2%29
  5990. # e = (-2 * k + sqrt(k**2 + d**2)) / d
  5991. # ~ (-2 * k + 2 * k + d**2/(4 * k) - d**4/(64 * k**3)) / d
  5992. # = d/(4 * k) - d**3/(64 * k**3)
  5993. envelop_param = (dim_minus_one/4 * kappa**-1.
  5994. - dim_minus_one**3/64 * kappa**-3.)
  5995. # reference step 0
  5996. node = (1. - envelop_param) / (1. + envelop_param)
  5997. # t = ln(1 - ((1-x)/(1+x))**2)
  5998. # = ln(4 * x / (1+x)**2)
  5999. # = ln(4) + ln(x) - 2*log1p(x)
  6000. correction = (kappa * node + dim_minus_one
  6001. * (np.log(4) + np.log(envelop_param)
  6002. - 2 * np.log1p(envelop_param)))
  6003. n_accepted = 0
  6004. x = np.zeros((n_samples, ))
  6005. halfdim = 0.5 * dim_minus_one
  6006. # main loop
  6007. while n_accepted < n_samples:
  6008. # generate candidates acc. to reference step 1
  6009. sym_beta = random_state.beta(halfdim, halfdim,
  6010. size=n_samples - n_accepted)
  6011. coord_x = (1 - (1 + envelop_param) * sym_beta) / (
  6012. 1 - (1 - envelop_param) * sym_beta)
  6013. # accept or reject: reference step 2
  6014. # reformulation for numerical stability:
  6015. # t = ln(1 - (1-x)/(1+x) * y)
  6016. # = ln((1 + x - y +x*y)/(1 +x))
  6017. accept_tol = random_state.random(n_samples - n_accepted)
  6018. criterion = (
  6019. kappa * coord_x
  6020. + dim_minus_one * (np.log((1 + envelop_param - coord_x
  6021. + coord_x * envelop_param) / (1 + envelop_param)))
  6022. - correction) > np.log(accept_tol)
  6023. accepted_iter = np.sum(criterion)
  6024. x[n_accepted:n_accepted + accepted_iter] = coord_x[criterion]
  6025. n_accepted += accepted_iter
  6026. # concatenate x and remaining coordinates: step 3
  6027. coord_rest = _sample_uniform_direction(dim_minus_one, n_accepted,
  6028. random_state)
  6029. coord_rest = np.einsum(
  6030. '...,...i->...i', np.sqrt(1 - x ** 2), coord_rest)
  6031. samples = np.concatenate([x[..., None], coord_rest], axis=1)
  6032. # reshape output to (size, dim)
  6033. if size is not None:
  6034. samples = samples.reshape(size + (dim, ))
  6035. else:
  6036. samples = np.squeeze(samples)
  6037. return samples
  6038. def _rotate_samples(self, samples, mu, dim):
  6039. """A QR decomposition is used to find the rotation that maps the
  6040. north pole (1, 0,...,0) to the vector mu. This rotation is then
  6041. applied to all samples.
  6042. Parameters
  6043. ----------
  6044. samples: array_like, shape = [..., n]
  6045. mu : array-like, shape=[n, ]
  6046. Point to parametrise the rotation.
  6047. Returns
  6048. -------
  6049. samples : rotated samples
  6050. """
  6051. base_point = np.zeros((dim, ))
  6052. base_point[0] = 1.
  6053. embedded = np.concatenate([mu[None, :], np.zeros((dim - 1, dim))])
  6054. rotmatrix, _ = np.linalg.qr(np.transpose(embedded))
  6055. if np.allclose(np.matmul(rotmatrix, base_point[:, None])[:, 0], mu):
  6056. rotsign = 1
  6057. else:
  6058. rotsign = -1
  6059. # apply rotation
  6060. samples = np.einsum('ij,...j->...i', rotmatrix, samples) * rotsign
  6061. return samples
  6062. def _rvs(self, dim, mu, kappa, size, random_state):
  6063. if dim == 2:
  6064. samples = self._rvs_2d(mu, kappa, size, random_state)
  6065. elif dim == 3:
  6066. samples = self._rvs_3d(kappa, size, random_state)
  6067. else:
  6068. samples = self._rejection_sampling(dim, kappa, size,
  6069. random_state)
  6070. if dim != 2:
  6071. samples = self._rotate_samples(samples, mu, dim)
  6072. return samples
  6073. def rvs(self, mu=None, kappa=1, size=1, random_state=None):
  6074. """Draw random samples from a von Mises-Fisher distribution.
  6075. Parameters
  6076. ----------
  6077. mu : array_like
  6078. Mean direction of the distribution. Must be a one-dimensional unit
  6079. vector of norm 1.
  6080. kappa : float
  6081. Concentration parameter. Must be positive.
  6082. size : int or tuple of ints, optional
  6083. Given a shape of, for example, (m,n,k), m*n*k samples are
  6084. generated, and packed in an m-by-n-by-k arrangement.
  6085. Because each sample is N-dimensional, the output shape
  6086. is (m,n,k,N). If no shape is specified, a single (N-D)
  6087. sample is returned.
  6088. random_state : {None, int, np.random.RandomState, np.random.Generator},
  6089. optional
  6090. Used for drawing random variates.
  6091. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  6092. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  6093. with seed.
  6094. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  6095. then that object is used.
  6096. Default is `None`.
  6097. Returns
  6098. -------
  6099. rvs : ndarray
  6100. Random variates of shape (`size`, `N`), where `N` is the
  6101. dimension of the distribution.
  6102. """
  6103. dim, mu, kappa = self._process_parameters(mu, kappa)
  6104. random_state = self._get_random_state(random_state)
  6105. samples = self._rvs(dim, mu, kappa, size, random_state)
  6106. return samples
  6107. def _entropy(self, dim, kappa):
  6108. halfdim = 0.5 * dim
  6109. return (-self._log_norm_factor(dim, kappa) - kappa *
  6110. ive(halfdim, kappa) / ive(halfdim - 1, kappa))
  6111. def entropy(self, mu=None, kappa=1):
  6112. """Compute the differential entropy of the von Mises-Fisher
  6113. distribution.
  6114. Parameters
  6115. ----------
  6116. mu : array_like, default: None
  6117. Mean direction of the distribution. Must be a one-dimensional unit
  6118. vector of norm 1.
  6119. kappa : float, default: 1
  6120. Concentration parameter. Must be positive.
  6121. Returns
  6122. -------
  6123. h : scalar
  6124. Entropy of the von Mises-Fisher distribution.
  6125. """
  6126. dim, _, kappa = self._process_parameters(mu, kappa)
  6127. return self._entropy(dim, kappa)
  6128. def fit(self, x):
  6129. """Fit the von Mises-Fisher distribution to data.
  6130. Parameters
  6131. ----------
  6132. x : array-like
  6133. Data the distribution is fitted to. Must be two dimensional.
  6134. The second axis of `x` must be unit vectors of norm 1 and
  6135. determine the dimensionality of the fitted
  6136. von Mises-Fisher distribution.
  6137. Returns
  6138. -------
  6139. mu : ndarray
  6140. Estimated mean direction.
  6141. kappa : float
  6142. Estimated concentration parameter.
  6143. """
  6144. # validate input data
  6145. x = np.asarray(x)
  6146. if x.ndim != 2:
  6147. raise ValueError("'x' must be two dimensional.")
  6148. if not np.allclose(np.linalg.norm(x, axis=-1), 1.):
  6149. msg = "'x' must be unit vectors of norm 1 along last dimension."
  6150. raise ValueError(msg)
  6151. dim = x.shape[-1]
  6152. # mu is simply the directional mean
  6153. dirstats = directional_stats(x)
  6154. mu = dirstats.mean_direction
  6155. r = dirstats.mean_resultant_length
  6156. # kappa is the solution to the equation:
  6157. # r = I[dim/2](kappa) / I[dim/2 -1](kappa)
  6158. # = I[dim/2](kappa) * exp(-kappa) / I[dim/2 -1](kappa) * exp(-kappa)
  6159. # = ive(dim/2, kappa) / ive(dim/2 -1, kappa)
  6160. halfdim = 0.5 * dim
  6161. def solve_for_kappa(kappa):
  6162. bessel_vals = ive([halfdim, halfdim - 1], kappa)
  6163. return bessel_vals[0]/bessel_vals[1] - r
  6164. root_res = root_scalar(solve_for_kappa, method="brentq",
  6165. bracket=(1e-8, 1e9))
  6166. kappa = root_res.root
  6167. return mu, kappa
  6168. vonmises_fisher = vonmises_fisher_gen()
  6169. class vonmises_fisher_frozen(multi_rv_frozen):
  6170. def __init__(self, mu=None, kappa=1, seed=None):
  6171. """Create a frozen von Mises-Fisher distribution.
  6172. Parameters
  6173. ----------
  6174. mu : array_like, default: None
  6175. Mean direction of the distribution.
  6176. kappa : float, default: 1
  6177. Concentration parameter. Must be positive.
  6178. seed : {None, int, `numpy.random.Generator`,
  6179. `numpy.random.RandomState`}, optional
  6180. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  6181. singleton is used.
  6182. If `seed` is an int, a new ``RandomState`` instance is used,
  6183. seeded with `seed`.
  6184. If `seed` is already a ``Generator`` or ``RandomState`` instance
  6185. then that instance is used.
  6186. """
  6187. self._dist = vonmises_fisher_gen(seed)
  6188. self.dim, self.mu, self.kappa = (
  6189. self._dist._process_parameters(mu, kappa)
  6190. )
  6191. def logpdf(self, x):
  6192. """
  6193. Parameters
  6194. ----------
  6195. x : array_like
  6196. Points at which to evaluate the log of the probability
  6197. density function. The last axis of `x` must correspond
  6198. to unit vectors of the same dimensionality as the distribution.
  6199. Returns
  6200. -------
  6201. logpdf : ndarray or scalar
  6202. Log of probability density function evaluated at `x`.
  6203. """
  6204. return self._dist._logpdf(x, self.dim, self.mu, self.kappa)
  6205. def pdf(self, x):
  6206. """
  6207. Parameters
  6208. ----------
  6209. x : array_like
  6210. Points at which to evaluate the log of the probability
  6211. density function. The last axis of `x` must correspond
  6212. to unit vectors of the same dimensionality as the distribution.
  6213. Returns
  6214. -------
  6215. pdf : ndarray or scalar
  6216. Probability density function evaluated at `x`.
  6217. """
  6218. return np.exp(self.logpdf(x))
  6219. def rvs(self, size=1, random_state=None):
  6220. """Draw random variates from the Von Mises-Fisher distribution.
  6221. Parameters
  6222. ----------
  6223. size : int or tuple of ints, optional
  6224. Given a shape of, for example, (m,n,k), m*n*k samples are
  6225. generated, and packed in an m-by-n-by-k arrangement.
  6226. Because each sample is N-dimensional, the output shape
  6227. is (m,n,k,N). If no shape is specified, a single (N-D)
  6228. sample is returned.
  6229. random_state : {None, int, `numpy.random.Generator`,
  6230. `numpy.random.RandomState`}, optional
  6231. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  6232. singleton is used.
  6233. If `seed` is an int, a new ``RandomState`` instance is used,
  6234. seeded with `seed`.
  6235. If `seed` is already a ``Generator`` or ``RandomState`` instance
  6236. then that instance is used.
  6237. Returns
  6238. -------
  6239. rvs : ndarray or scalar
  6240. Random variates of size (`size`, `N`), where `N` is the
  6241. dimension of the distribution.
  6242. """
  6243. random_state = self._dist._get_random_state(random_state)
  6244. return self._dist._rvs(self.dim, self.mu, self.kappa, size,
  6245. random_state)
  6246. def entropy(self):
  6247. """
  6248. Calculate the differential entropy of the von Mises-Fisher
  6249. distribution.
  6250. Returns
  6251. -------
  6252. h: float
  6253. Entropy of the Von Mises-Fisher distribution.
  6254. """
  6255. return self._dist._entropy(self.dim, self.kappa)
  6256. class normal_inverse_gamma_gen(multi_rv_generic):
  6257. r"""Normal-inverse-gamma distribution.
  6258. The normal-inverse-gamma distribution is the conjugate prior of a normal
  6259. distribution with unknown mean and variance.
  6260. Methods
  6261. -------
  6262. pdf(x, s2, mu=0, lmbda=1, a=1, b=1)
  6263. Probability density function.
  6264. logpdf(x, s2, mu=0, lmbda=1, a=1, b=1)
  6265. Log of the probability density function.
  6266. mean(mu=0, lmbda=1, a=1, b=1)
  6267. Distribution mean.
  6268. var(mu=0, lmbda=1, a=1, b=1)
  6269. Distribution variance.
  6270. rvs(mu=0, lmbda=1, a=1, b=1, size=None, random_state=None)
  6271. Draw random samples.
  6272. Parameters
  6273. ----------
  6274. mu, lmbda, a, b : array_like
  6275. Shape parameters of the distribution. See notes.
  6276. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  6277. Used for drawing random variates.
  6278. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  6279. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  6280. with seed.
  6281. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  6282. then that object is used.
  6283. Default is `None`.
  6284. See Also
  6285. --------
  6286. norm
  6287. invgamma
  6288. Notes
  6289. -----
  6290. The probability density function of `normal_inverse_gamma` is:
  6291. .. math::
  6292. f(x, \sigma^2; \mu, \lambda, \alpha, \beta) =
  6293. \frac{\sqrt{\lambda}}{\sqrt{2 \pi \sigma^2}}
  6294. \frac{\beta^\alpha}{\Gamma(\alpha)}
  6295. \left( \frac{1}{\sigma^2} \right)^{\alpha + 1}
  6296. \exp \left(- \frac{2 \beta + \lambda (x - \mu)^2} {2 \sigma^2} \right)
  6297. where all parameters are real and finite, and :math:`\sigma^2 > 0`,
  6298. :math:`\lambda > 0`, :math:`\alpha > 0`, and :math:`\beta > 0`.
  6299. Methods ``normal_inverse_gamma.pdf`` and ``normal_inverse_gamma.logpdf``
  6300. accept `x` and `s2` for arguments :math:`x` and :math:`\sigma^2`.
  6301. All methods accept `mu`, `lmbda`, `a`, and `b` for shape parameters
  6302. :math:`\mu`, :math:`\lambda`, :math:`\alpha`, and :math:`\beta`,
  6303. respectively.
  6304. .. versionadded:: 1.15
  6305. References
  6306. ----------
  6307. .. [1] Normal-inverse-gamma distribution, Wikipedia,
  6308. https://en.wikipedia.org/wiki/Normal-inverse-gamma_distribution
  6309. Examples
  6310. --------
  6311. Suppose we wish to investigate the relationship between the
  6312. normal-inverse-gamma distribution and the inverse gamma distribution.
  6313. >>> import numpy as np
  6314. >>> from scipy import stats
  6315. >>> import matplotlib.pyplot as plt
  6316. >>> rng = np.random.default_rng(527484872345)
  6317. >>> mu, lmbda, a, b = 0, 1, 20, 20
  6318. >>> norm_inv_gamma = stats.normal_inverse_gamma(mu, lmbda, a, b)
  6319. >>> inv_gamma = stats.invgamma(a, scale=b)
  6320. One approach is to compare the distribution of the `s2` elements of
  6321. random variates against the PDF of an inverse gamma distribution.
  6322. >>> _, s2 = norm_inv_gamma.rvs(size=10000, random_state=rng)
  6323. >>> bins = np.linspace(s2.min(), s2.max(), 50)
  6324. >>> plt.hist(s2, bins=bins, density=True, label='Frequency density')
  6325. >>> s2 = np.linspace(s2.min(), s2.max(), 300)
  6326. >>> plt.plot(s2, inv_gamma.pdf(s2), label='PDF')
  6327. >>> plt.xlabel(r'$\sigma^2$')
  6328. >>> plt.ylabel('Frequency density / PMF')
  6329. >>> plt.show()
  6330. Similarly, we can compare the marginal distribution of `s2` against
  6331. an inverse gamma distribution.
  6332. >>> from scipy.integrate import quad_vec
  6333. >>> from scipy import integrate
  6334. >>> s2 = np.linspace(0.5, 3, 6)
  6335. >>> res = quad_vec(lambda x: norm_inv_gamma.pdf(x, s2), -np.inf, np.inf)[0]
  6336. >>> np.allclose(res, inv_gamma.pdf(s2))
  6337. True
  6338. The sample mean is comparable to the mean of the distribution.
  6339. >>> x, s2 = norm_inv_gamma.rvs(size=10000, random_state=rng)
  6340. >>> x.mean(), s2.mean()
  6341. (np.float64(-0.005254750127304425), np.float64(1.050438111436508))
  6342. >>> norm_inv_gamma.mean()
  6343. (np.float64(0.0), np.float64(1.0526315789473684))
  6344. Similarly, for the variance:
  6345. >>> x.var(ddof=1), s2.var(ddof=1)
  6346. (np.float64(1.0546150578185023), np.float64(0.061829865266330754))
  6347. >>> norm_inv_gamma.var()
  6348. (np.float64(1.0526315789473684), np.float64(0.061557402277623886))
  6349. """
  6350. def rvs(self, mu=0, lmbda=1, a=1, b=1, size=None, random_state=None):
  6351. """Draw random samples from the distribution.
  6352. Parameters
  6353. ----------
  6354. mu, lmbda, a, b : array_like, optional
  6355. Shape parameters. `lmbda`, `a`, and `b` must be greater
  6356. than zero.
  6357. size : int or tuple of ints, optional
  6358. Shape of samples to draw.
  6359. random_state : {None, int, np.random.RandomState, np.random.Generator}, optional
  6360. Used for drawing random variates.
  6361. If `random_state` is `None`, the `~np.random.RandomState` singleton is used.
  6362. If `random_state` is an int, a new ``RandomState`` instance is used, seeded
  6363. with `random_state`.
  6364. If `random_state` is already a ``RandomState`` or ``Generator`` instance,
  6365. then that object is used.
  6366. Default is `None`.
  6367. Returns
  6368. -------
  6369. x, s2 : ndarray
  6370. Random variates.
  6371. """
  6372. random_state = self._get_random_state(random_state)
  6373. s2 = invgamma(a, scale=b).rvs(size=size, random_state=random_state)
  6374. scale = (s2 / lmbda)**0.5
  6375. x = norm(loc=mu, scale=scale).rvs(size=size, random_state=random_state)
  6376. dtype = np.result_type(1.0, mu, lmbda, a, b)
  6377. return x.astype(dtype), s2.astype(dtype)
  6378. def _logpdf(self, x, s2, mu, lmbda, a, b):
  6379. t1 = 0.5 * (np.log(lmbda) - np.log(2 * np.pi * s2))
  6380. t2 = a*np.log(b) - special.gammaln(a).astype(a.dtype)
  6381. t3 = -(a + 1) * np.log(s2)
  6382. t4 = -(2*b + lmbda*(x - mu)**2) / (2*s2)
  6383. return t1 + t2 + t3 + t4
  6384. def logpdf(self, x, s2, mu=0, lmbda=1, a=1, b=1):
  6385. """Log of the probability density function.
  6386. Parameters
  6387. ----------
  6388. x, s2 : array_like
  6389. Arguments. `s2` must be greater than zero.
  6390. mu, lmbda, a, b : array_like, optional
  6391. Shape parameters. `lmbda`, `a`, and `b` must be greater
  6392. than zero.
  6393. Returns
  6394. -------
  6395. logpdf : ndarray or scalar
  6396. Log of the probability density function.
  6397. """
  6398. invalid, args = self._process_parameters_pdf(x, s2, mu, lmbda, a, b)
  6399. s2 = args[1]
  6400. # Keep it simple for now; lazyselect later, perhaps.
  6401. with np.errstate(all='ignore'):
  6402. logpdf = np.asarray(self._logpdf(*args))
  6403. logpdf[s2 <= 0] = -np.inf
  6404. logpdf[invalid] = np.nan
  6405. return logpdf[()]
  6406. def _pdf(self, x, s2, mu, lmbda, a, b):
  6407. t1 = np.sqrt(lmbda / (2 * np.pi * s2))
  6408. t2 = b**a / special.gamma(a).astype(a.dtype)
  6409. t3 = (1 / s2)**(a + 1)
  6410. t4 = np.exp(-(2*b + lmbda*(x - mu)**2) / (2*s2))
  6411. return t1 * t2 * t3 * t4
  6412. def pdf(self, x, s2, mu=0, lmbda=1, a=1, b=1):
  6413. """The probability density function.
  6414. Parameters
  6415. ----------
  6416. x, s2 : array_like
  6417. Arguments. `s2` must be greater than zero.
  6418. mu, lmbda, a, b : array_like, optional
  6419. Shape parameters. `lmbda`, `a`, and `b` must be greater
  6420. than zero.
  6421. Returns
  6422. -------
  6423. logpdf : ndarray or scalar
  6424. The probability density function.
  6425. """
  6426. invalid, args = self._process_parameters_pdf(x, s2, mu, lmbda, a, b)
  6427. s2 = args[1]
  6428. # Keep it simple for now; lazyselect later, perhaps.
  6429. with np.errstate(all='ignore'):
  6430. pdf = np.asarray(self._pdf(*args))
  6431. pdf[s2 <= 0] = 0
  6432. pdf[invalid] = np.nan
  6433. return pdf[()]
  6434. def mean(self, mu=0, lmbda=1, a=1, b=1):
  6435. """The mean of the distribution.
  6436. Parameters
  6437. ----------
  6438. mu, lmbda, a, b : array_like, optional
  6439. Shape parameters. `lmbda` and `b` must be greater
  6440. than zero, and `a` must be greater than one.
  6441. Returns
  6442. -------
  6443. x, s2 : ndarray
  6444. The mean of the distribution.
  6445. """
  6446. invalid, args = self._process_shapes(mu, lmbda, a, b)
  6447. mu, lmbda, a, b = args
  6448. invalid |= ~(a > 1)
  6449. mean_x = np.asarray(mu).copy()
  6450. mean_s2 = np.asarray(b / (a - 1))
  6451. mean_x[invalid] = np.nan
  6452. mean_s2[invalid] = np.nan
  6453. return mean_x[()], mean_s2[()]
  6454. def var(self, mu=0, lmbda=1, a=1, b=1):
  6455. """The variance of the distribution.
  6456. Parameters
  6457. ----------
  6458. mu, lmbda, a, b : array_like, optional
  6459. Shape parameters. `lmbda` and `b` must be greater
  6460. than zero, and `a` must be greater than two.
  6461. Returns
  6462. -------
  6463. x, s2 : ndarray
  6464. The variance of the distribution.
  6465. """
  6466. invalid, args = self._process_shapes(mu, lmbda, a, b)
  6467. mu, lmbda, a, b = args
  6468. invalid_x = invalid | ~(a > 1)
  6469. invalid_s2 = invalid | ~(a > 2)
  6470. var_x = b / ((a - 1) * lmbda)
  6471. var_s2 = b**2 / ((a - 1)**2 * (a - 2))
  6472. var_x, var_s2 = np.asarray(var_x), np.asarray(var_s2)
  6473. var_x[invalid_x] = np.nan
  6474. var_s2[invalid_s2] = np.nan
  6475. return var_x[()], var_s2[()]
  6476. def _process_parameters_pdf(self, x, s2, mu, lmbda, a, b):
  6477. args = np.broadcast_arrays(x, s2, mu, lmbda, a, b)
  6478. dtype = np.result_type(1.0, *(arg.dtype for arg in args))
  6479. args = [arg.astype(dtype, copy=False) for arg in args]
  6480. x, s2, mu, lmbda, a, b = args
  6481. invalid = ~((lmbda > 0) & (a > 0) & (b > 0))
  6482. return invalid, args
  6483. def _process_shapes(self, mu, lmbda, a, b):
  6484. args = np.broadcast_arrays(mu, lmbda, a, b)
  6485. dtype = np.result_type(1.0, *(arg.dtype for arg in args))
  6486. args = [arg.astype(dtype, copy=False) for arg in args]
  6487. mu, lmbda, a, b = args
  6488. invalid = ~((lmbda > 0) & (a > 0) & (b > 0))
  6489. return invalid, args
  6490. def __call__(self, mu=0, lmbda=1, a=1, b=1, seed=None):
  6491. return normal_inverse_gamma_frozen(mu, lmbda, a, b, seed=seed)
  6492. normal_inverse_gamma = normal_inverse_gamma_gen()
  6493. class normal_inverse_gamma_frozen(multi_rv_frozen):
  6494. def __init__(self, mu=0, lmbda=1, a=1, b=1, seed=None):
  6495. self._dist = normal_inverse_gamma_gen(seed)
  6496. self._shapes = mu, lmbda, a, b
  6497. def logpdf(self, x, s2):
  6498. return self._dist.logpdf(x, s2, *self._shapes)
  6499. def pdf(self, x, s2):
  6500. return self._dist.pdf(x, s2, *self._shapes)
  6501. def mean(self):
  6502. return self._dist.mean(*self._shapes)
  6503. def var(self):
  6504. return self._dist.var(*self._shapes)
  6505. def rvs(self, size=None, random_state=None):
  6506. return self._dist.rvs(*self._shapes, size=size, random_state=random_state)
  6507. # Set frozen generator docstrings from corresponding docstrings in
  6508. # normal_inverse_gamma_gen and fill in default strings in class docstrings
  6509. for name in ['logpdf', 'pdf', 'mean', 'var', 'rvs']:
  6510. method = normal_inverse_gamma_gen.__dict__[name]
  6511. method_frozen = normal_inverse_gamma_frozen.__dict__[name]
  6512. method_frozen.__doc__ = doccer.docformat(method.__doc__,
  6513. mvn_docdict_noparams)
  6514. method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params)