_continuous_distns.py 400 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543
  1. #
  2. # Author: Travis Oliphant 2002-2011 with contributions from
  3. # SciPy Developers 2004-2011
  4. #
  5. import warnings
  6. from collections.abc import Iterable
  7. from functools import wraps, cached_property
  8. import ctypes
  9. import operator
  10. import numpy as np
  11. from numpy.polynomial import Polynomial
  12. from scipy.interpolate import BSpline
  13. from scipy._lib.doccer import (extend_notes_in_docstring,
  14. replace_notes_in_docstring,
  15. inherit_docstring_from)
  16. from scipy._lib._ccallback import LowLevelCallable
  17. from scipy import optimize
  18. from scipy import integrate
  19. import scipy.special as sc
  20. import scipy.special._ufuncs as scu
  21. from scipy._lib._util import _lazyselect
  22. import scipy._lib.array_api_extra as xpx
  23. from scipy._lib._array_api import xp_promote
  24. from . import _stats
  25. from ._tukeylambda_stats import (tukeylambda_variance as _tlvar,
  26. tukeylambda_kurtosis as _tlkurt)
  27. from ._distn_infrastructure import (_vectorize_rvs_over_shapes,
  28. get_distribution_names, _kurtosis, _isintegral,
  29. rv_continuous, _skew, _get_fixed_fit_value, _check_shape, _ShapeInfo)
  30. from ._ksstats import kolmogn, kolmognp, kolmogni
  31. from ._constants import (_XMIN, _LOGXMIN, _EULER, _ZETA3, _SQRT_PI,
  32. _SQRT_2_OVER_PI, _LOG_PI, _LOG_SQRT_2_OVER_PI)
  33. from ._censored_data import CensoredData
  34. from scipy.optimize import root_scalar
  35. from scipy.stats._warnings_errors import FitError
  36. import scipy.stats as stats
  37. def _remove_optimizer_parameters(kwds):
  38. """
  39. Remove the optimizer-related keyword arguments 'loc', 'scale' and
  40. 'optimizer' from `kwds`. Then check that `kwds` is empty, and
  41. raise `TypeError("Unknown arguments: %s." % kwds)` if it is not.
  42. This function is used in the fit method of distributions that override
  43. the default method and do not use the default optimization code.
  44. `kwds` is modified in-place.
  45. """
  46. kwds.pop('loc', None)
  47. kwds.pop('scale', None)
  48. kwds.pop('optimizer', None)
  49. kwds.pop('method', None)
  50. if kwds:
  51. raise TypeError(f"Unknown arguments: {kwds}.")
  52. def _call_super_mom(fun):
  53. # If fit method is overridden only for MLE and doesn't specify what to do
  54. # if method == 'mm' or with censored data, this decorator calls the generic
  55. # implementation.
  56. @wraps(fun)
  57. def wrapper(self, data, *args, **kwds):
  58. method = kwds.get('method', 'mle').lower()
  59. censored = isinstance(data, CensoredData)
  60. if method == 'mm' or (censored and data.num_censored() > 0):
  61. return super(type(self), self).fit(data, *args, **kwds)
  62. else:
  63. if censored:
  64. # data is an instance of CensoredData, but actually holds
  65. # no censored values, so replace it with the array of
  66. # uncensored values.
  67. data = data._uncensored
  68. return fun(self, data, *args, **kwds)
  69. return wrapper
  70. def _get_left_bracket(fun, rbrack, lbrack=None):
  71. # find left bracket for `root_scalar`. A guess for lbrack may be provided.
  72. lbrack = lbrack or rbrack - 1
  73. diff = rbrack - lbrack
  74. # if there is no sign change in `fun` between the brackets, expand
  75. # rbrack - lbrack until a sign change occurs
  76. def interval_contains_root(lbrack, rbrack):
  77. # return true if the signs disagree.
  78. return np.sign(fun(lbrack)) != np.sign(fun(rbrack))
  79. while not interval_contains_root(lbrack, rbrack):
  80. diff *= 2
  81. lbrack = rbrack - diff
  82. msg = ("The solver could not find a bracket containing a "
  83. "root to an MLE first order condition.")
  84. if np.isinf(lbrack):
  85. raise FitSolverError(msg)
  86. return lbrack
  87. class ksone_gen(rv_continuous):
  88. r"""Kolmogorov-Smirnov one-sided test statistic distribution.
  89. This is the distribution of the one-sided Kolmogorov-Smirnov (KS)
  90. statistics :math:`D_n^+` and :math:`D_n^-`
  91. for a finite sample size ``n >= 1`` (the shape parameter).
  92. %(before_notes)s
  93. See Also
  94. --------
  95. kstwobign, kstwo, kstest
  96. Notes
  97. -----
  98. :math:`D_n^+` and :math:`D_n^-` are given by
  99. .. math::
  100. D_n^+ &= \text{sup}_x (F_n(x) - F(x)),\\
  101. D_n^- &= \text{sup}_x (F(x) - F_n(x)),\\
  102. where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
  103. `ksone` describes the distribution under the null hypothesis of the KS test
  104. that the empirical CDF corresponds to :math:`n` i.i.d. random variates
  105. with CDF :math:`F`.
  106. %(after_notes)s
  107. References
  108. ----------
  109. .. [1] Birnbaum, Z. W. and Tingey, F.H. "One-sided confidence contours
  110. for probability distribution functions", The Annals of Mathematical
  111. Statistics, 22(4), pp 592-596 (1951).
  112. Examples
  113. --------
  114. >>> import numpy as np
  115. >>> from scipy.stats import ksone
  116. >>> import matplotlib.pyplot as plt
  117. >>> fig, ax = plt.subplots(1, 1)
  118. Display the probability density function (``pdf``):
  119. >>> n = 1e+03
  120. >>> x = np.linspace(ksone.ppf(0.01, n),
  121. ... ksone.ppf(0.99, n), 100)
  122. >>> ax.plot(x, ksone.pdf(x, n),
  123. ... 'r-', lw=5, alpha=0.6, label='ksone pdf')
  124. Alternatively, the distribution object can be called (as a function)
  125. to fix the shape, location and scale parameters. This returns a "frozen"
  126. RV object holding the given parameters fixed.
  127. Freeze the distribution and display the frozen ``pdf``:
  128. >>> rv = ksone(n)
  129. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  130. >>> ax.legend(loc='best', frameon=False)
  131. >>> plt.show()
  132. Check accuracy of ``cdf`` and ``ppf``:
  133. >>> vals = ksone.ppf([0.001, 0.5, 0.999], n)
  134. >>> np.allclose([0.001, 0.5, 0.999], ksone.cdf(vals, n))
  135. True
  136. """
  137. def _argcheck(self, n):
  138. return (n >= 1) & (n == np.round(n))
  139. def _shape_info(self):
  140. return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
  141. def _pdf(self, x, n):
  142. return -scu._smirnovp(n, x)
  143. def _cdf(self, x, n):
  144. return scu._smirnovc(n, x)
  145. def _sf(self, x, n):
  146. return sc.smirnov(n, x)
  147. def _ppf(self, q, n):
  148. return scu._smirnovci(n, q)
  149. def _isf(self, q, n):
  150. return sc.smirnovi(n, q)
  151. ksone = ksone_gen(a=0.0, b=1.0, name='ksone')
  152. class kstwo_gen(rv_continuous):
  153. r"""Kolmogorov-Smirnov two-sided test statistic distribution.
  154. This is the distribution of the two-sided Kolmogorov-Smirnov (KS)
  155. statistic :math:`D_n` for a finite sample size ``n >= 1``
  156. (the shape parameter).
  157. %(before_notes)s
  158. See Also
  159. --------
  160. kstwobign, ksone, kstest
  161. Notes
  162. -----
  163. :math:`D_n` is given by
  164. .. math::
  165. D_n = \text{sup}_x |F_n(x) - F(x)|
  166. where :math:`F` is a (continuous) CDF and :math:`F_n` is an empirical CDF.
  167. `kstwo` describes the distribution under the null hypothesis of the KS test
  168. that the empirical CDF corresponds to :math:`n` i.i.d. random variates
  169. with CDF :math:`F`.
  170. %(after_notes)s
  171. References
  172. ----------
  173. .. [1] Simard, R., L'Ecuyer, P. "Computing the Two-Sided
  174. Kolmogorov-Smirnov Distribution", Journal of Statistical Software,
  175. Vol 39, 11, 1-18 (2011).
  176. Examples
  177. --------
  178. >>> import numpy as np
  179. >>> from scipy.stats import kstwo
  180. >>> import matplotlib.pyplot as plt
  181. >>> fig, ax = plt.subplots(1, 1)
  182. Display the probability density function (``pdf``):
  183. >>> n = 10
  184. >>> x = np.linspace(kstwo.ppf(0.01, n),
  185. ... kstwo.ppf(0.99, n), 100)
  186. >>> ax.plot(x, kstwo.pdf(x, n),
  187. ... 'r-', lw=5, alpha=0.6, label='kstwo pdf')
  188. Alternatively, the distribution object can be called (as a function)
  189. to fix the shape, location and scale parameters. This returns a "frozen"
  190. RV object holding the given parameters fixed.
  191. Freeze the distribution and display the frozen ``pdf``:
  192. >>> rv = kstwo(n)
  193. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  194. >>> ax.legend(loc='best', frameon=False)
  195. >>> plt.show()
  196. Check accuracy of ``cdf`` and ``ppf``:
  197. >>> vals = kstwo.ppf([0.001, 0.5, 0.999], n)
  198. >>> np.allclose([0.001, 0.5, 0.999], kstwo.cdf(vals, n))
  199. True
  200. """
  201. def _argcheck(self, n):
  202. return (n >= 1) & (n == np.round(n))
  203. def _shape_info(self):
  204. return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
  205. def _get_support(self, n):
  206. return (0.5/(n if not isinstance(n, Iterable) else np.asanyarray(n)),
  207. 1.0)
  208. def _pdf(self, x, n):
  209. return kolmognp(n, x)
  210. def _cdf(self, x, n):
  211. return kolmogn(n, x)
  212. def _sf(self, x, n):
  213. return kolmogn(n, x, cdf=False)
  214. def _ppf(self, q, n):
  215. return kolmogni(n, q, cdf=True)
  216. def _isf(self, q, n):
  217. return kolmogni(n, q, cdf=False)
  218. # Use the pdf, (not the ppf) to compute moments
  219. kstwo = kstwo_gen(momtype=0, a=0.0, b=1.0, name='kstwo')
  220. class kstwobign_gen(rv_continuous):
  221. r"""Limiting distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
  222. This is the asymptotic distribution of the two-sided Kolmogorov-Smirnov
  223. statistic :math:`\sqrt{n} D_n` that measures the maximum absolute
  224. distance of the theoretical (continuous) CDF from the empirical CDF.
  225. (see `kstest`).
  226. %(before_notes)s
  227. See Also
  228. --------
  229. ksone, kstwo, kstest
  230. Notes
  231. -----
  232. :math:`\sqrt{n} D_n` is given by
  233. .. math::
  234. D_n = \text{sup}_x |F_n(x) - F(x)|
  235. where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
  236. `kstwobign` describes the asymptotic distribution (i.e. the limit of
  237. :math:`\sqrt{n} D_n`) under the null hypothesis of the KS test that the
  238. empirical CDF corresponds to i.i.d. random variates with CDF :math:`F`.
  239. %(after_notes)s
  240. References
  241. ----------
  242. .. [1] Feller, W. "On the Kolmogorov-Smirnov Limit Theorems for Empirical
  243. Distributions", Ann. Math. Statist. Vol 19, 177-189 (1948).
  244. %(example)s
  245. """
  246. def _shape_info(self):
  247. return []
  248. def _pdf(self, x):
  249. return -scu._kolmogp(x)
  250. def _cdf(self, x):
  251. return scu._kolmogc(x)
  252. def _sf(self, x):
  253. return sc.kolmogorov(x)
  254. def _ppf(self, q):
  255. return scu._kolmogci(q)
  256. def _isf(self, q):
  257. return sc.kolmogi(q)
  258. kstwobign = kstwobign_gen(a=0.0, name='kstwobign')
  259. ## Normal distribution
  260. # loc = mu, scale = std
  261. # Keep these implementations out of the class definition so they can be reused
  262. # by other distributions.
  263. _norm_pdf_C = np.sqrt(2*np.pi)
  264. _norm_pdf_logC = np.log(_norm_pdf_C)
  265. def _norm_pdf(x):
  266. return np.exp(-x**2/2.0) / _norm_pdf_C
  267. def _norm_logpdf(x):
  268. return -x**2 / 2.0 - _norm_pdf_logC
  269. def _norm_cdf(x):
  270. return sc.ndtr(x)
  271. def _norm_logcdf(x):
  272. return sc.log_ndtr(x)
  273. def _norm_ppf(q):
  274. return sc.ndtri(q)
  275. def _norm_sf(x):
  276. return _norm_cdf(-x)
  277. def _norm_logsf(x):
  278. return _norm_logcdf(-x)
  279. def _norm_isf(q):
  280. return -_norm_ppf(q)
  281. class norm_gen(rv_continuous):
  282. r"""A normal continuous random variable.
  283. The location (``loc``) keyword specifies the mean.
  284. The scale (``scale``) keyword specifies the standard deviation.
  285. %(before_notes)s
  286. Notes
  287. -----
  288. The probability density function for `norm` is:
  289. .. math::
  290. f(x) = \frac{\exp(-x^2/2)}{\sqrt{2\pi}}
  291. for a real number :math:`x`.
  292. %(after_notes)s
  293. %(example)s
  294. """
  295. def _shape_info(self):
  296. return []
  297. def _rvs(self, size=None, random_state=None):
  298. return random_state.standard_normal(size)
  299. def _pdf(self, x):
  300. # norm.pdf(x) = exp(-x**2/2)/sqrt(2*pi)
  301. return _norm_pdf(x)
  302. def _logpdf(self, x):
  303. return _norm_logpdf(x)
  304. def _cdf(self, x):
  305. return _norm_cdf(x)
  306. def _logcdf(self, x):
  307. return _norm_logcdf(x)
  308. def _sf(self, x):
  309. return _norm_sf(x)
  310. def _logsf(self, x):
  311. return _norm_logsf(x)
  312. def _ppf(self, q):
  313. return _norm_ppf(q)
  314. def _isf(self, q):
  315. return _norm_isf(q)
  316. def _stats(self):
  317. return 0.0, 1.0, 0.0, 0.0
  318. def _entropy(self):
  319. return 0.5*(np.log(2*np.pi)+1)
  320. @_call_super_mom
  321. @replace_notes_in_docstring(rv_continuous, notes="""\
  322. For the normal distribution, method of moments and maximum likelihood
  323. estimation give identical fits, and explicit formulas for the estimates
  324. are available.
  325. This function uses these explicit formulas for the maximum likelihood
  326. estimation of the normal distribution parameters, so the
  327. `optimizer` and `method` arguments are ignored.\n\n""")
  328. def fit(self, data, **kwds):
  329. floc = kwds.pop('floc', None)
  330. fscale = kwds.pop('fscale', None)
  331. _remove_optimizer_parameters(kwds)
  332. if floc is not None and fscale is not None:
  333. # This check is for consistency with `rv_continuous.fit`.
  334. # Without this check, this function would just return the
  335. # parameters that were given.
  336. raise ValueError("All parameters fixed. There is nothing to "
  337. "optimize.")
  338. data = np.asarray(data)
  339. if not np.isfinite(data).all():
  340. raise ValueError("The data contains non-finite values.")
  341. if floc is None:
  342. loc = data.mean()
  343. else:
  344. loc = floc
  345. if fscale is None:
  346. scale = np.sqrt(((data - loc)**2).mean())
  347. else:
  348. scale = fscale
  349. return loc, scale
  350. def _munp(self, n):
  351. """
  352. @returns Moments of standard normal distribution for integer n >= 0
  353. See eq. 16 of https://arxiv.org/abs/1209.4340v2
  354. """
  355. if n == 0:
  356. return 1.
  357. if n % 2 == 0:
  358. return sc.factorial2(int(n) - 1)
  359. else:
  360. return 0.
  361. norm = norm_gen(name='norm')
  362. class alpha_gen(rv_continuous):
  363. r"""An alpha continuous random variable.
  364. %(before_notes)s
  365. Notes
  366. -----
  367. The probability density function for `alpha` ([1]_, [2]_) is:
  368. .. math::
  369. f(x, a) = \frac{1}{x^2 \Phi(a) \sqrt{2\pi}} *
  370. \exp(-\frac{1}{2} (a-1/x)^2)
  371. where :math:`\Phi` is the normal CDF, :math:`x > 0`, and :math:`a > 0`.
  372. `alpha` takes ``a`` as a shape parameter.
  373. %(after_notes)s
  374. References
  375. ----------
  376. .. [1] Johnson, Kotz, and Balakrishnan, "Continuous Univariate
  377. Distributions, Volume 1", Second Edition, John Wiley and Sons,
  378. p. 173 (1994).
  379. .. [2] Anthony A. Salvia, "Reliability applications of the Alpha
  380. Distribution", IEEE Transactions on Reliability, Vol. R-34,
  381. No. 3, pp. 251-252 (1985).
  382. %(example)s
  383. """
  384. _support_mask = rv_continuous._open_support_mask
  385. def _shape_info(self):
  386. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  387. def _pdf(self, x, a):
  388. # alpha.pdf(x, a) = 1/(x**2*Phi(a)*sqrt(2*pi)) * exp(-1/2 * (a-1/x)**2)
  389. return 1.0/(x**2)/_norm_cdf(a)*_norm_pdf(a-1.0/x)
  390. def _logpdf(self, x, a):
  391. return -2*np.log(x) + _norm_logpdf(a-1.0/x) - np.log(_norm_cdf(a))
  392. def _cdf(self, x, a):
  393. return _norm_cdf(a-1.0/x) / _norm_cdf(a)
  394. def _ppf(self, q, a):
  395. return 1.0/np.asarray(a - _norm_ppf(q*_norm_cdf(a)))
  396. def _stats(self, a):
  397. return [np.inf]*2 + [np.nan]*2
  398. alpha = alpha_gen(a=0.0, name='alpha')
  399. class anglit_gen(rv_continuous):
  400. r"""An anglit continuous random variable.
  401. %(before_notes)s
  402. Notes
  403. -----
  404. The probability density function for `anglit` is:
  405. .. math::
  406. f(x) = \sin(2x + \pi/2) = \cos(2x)
  407. for :math:`-\pi/4 \le x \le \pi/4`.
  408. %(after_notes)s
  409. %(example)s
  410. """
  411. def _shape_info(self):
  412. return []
  413. def _pdf(self, x):
  414. # anglit.pdf(x) = sin(2*x + \pi/2) = cos(2*x)
  415. return np.cos(2*x)
  416. def _cdf(self, x):
  417. return np.sin(x+np.pi/4)**2.0
  418. def _sf(self, x):
  419. return np.cos(x + np.pi / 4) ** 2.0
  420. def _ppf(self, q):
  421. return np.arcsin(np.sqrt(q))-np.pi/4
  422. def _stats(self):
  423. return 0.0, np.pi*np.pi/16-0.5, 0.0, -2*(np.pi**4 - 96)/(np.pi*np.pi-8)**2
  424. def _entropy(self):
  425. return 1-np.log(2)
  426. anglit = anglit_gen(a=-np.pi/4, b=np.pi/4, name='anglit')
  427. class arcsine_gen(rv_continuous):
  428. r"""An arcsine continuous random variable.
  429. %(before_notes)s
  430. Notes
  431. -----
  432. The probability density function for `arcsine` is:
  433. .. math::
  434. f(x) = \frac{1}{\pi \sqrt{x (1-x)}}
  435. for :math:`0 < x < 1`.
  436. %(after_notes)s
  437. %(example)s
  438. """
  439. def _shape_info(self):
  440. return []
  441. def _pdf(self, x):
  442. # arcsine.pdf(x) = 1/(pi*sqrt(x*(1-x)))
  443. with np.errstate(divide='ignore'):
  444. return 1.0/np.pi/np.sqrt(x*(1-x))
  445. def _cdf(self, x):
  446. return 2.0/np.pi*np.arcsin(np.sqrt(x))
  447. def _ppf(self, q):
  448. return np.sin(np.pi/2.0*q)**2.0
  449. def _stats(self):
  450. mu = 0.5
  451. mu2 = 1.0/8
  452. g1 = 0
  453. g2 = -3.0/2.0
  454. return mu, mu2, g1, g2
  455. def _entropy(self):
  456. return -0.24156447527049044468
  457. arcsine = arcsine_gen(a=0.0, b=1.0, name='arcsine')
  458. class FitDataError(ValueError):
  459. """Raised when input data is inconsistent with fixed parameters."""
  460. # This exception is raised by, for example, beta_gen.fit when both floc
  461. # and fscale are fixed and there are values in the data not in the open
  462. # interval (floc, floc+fscale).
  463. def __init__(self, distr, lower, upper):
  464. self.args = (
  465. "Invalid values in `data`. Maximum likelihood "
  466. f"estimation with {distr!r} requires that {lower!r} < "
  467. f"(x - loc)/scale < {upper!r} for each x in `data`.",
  468. )
  469. class FitSolverError(FitError):
  470. """
  471. Raised when a solver fails to converge while fitting a distribution.
  472. """
  473. # This exception is raised by, for example, beta_gen.fit when
  474. # optimize.fsolve returns with ier != 1.
  475. def __init__(self, mesg):
  476. emsg = "Solver for the MLE equations failed to converge: "
  477. emsg += mesg.replace('\n', '')
  478. self.args = (emsg,)
  479. def _beta_mle_a(a, b, n, s1):
  480. # The zeros of this function give the MLE for `a`, with
  481. # `b`, `n` and `s1` given. `s1` is the sum of the logs of
  482. # the data. `n` is the number of data points.
  483. psiab = sc.psi(a + b)
  484. func = s1 - n * (-psiab + sc.psi(a))
  485. return func
  486. def _beta_mle_ab(theta, n, s1, s2):
  487. # Zeros of this function are critical points of
  488. # the maximum likelihood function. Solving this system
  489. # for theta (which contains a and b) gives the MLE for a and b
  490. # given `n`, `s1` and `s2`. `s1` is the sum of the logs of the data,
  491. # and `s2` is the sum of the logs of 1 - data. `n` is the number
  492. # of data points.
  493. a, b = theta
  494. psiab = sc.psi(a + b)
  495. func = [s1 - n * (-psiab + sc.psi(a)),
  496. s2 - n * (-psiab + sc.psi(b))]
  497. return func
  498. class beta_gen(rv_continuous):
  499. r"""A beta continuous random variable.
  500. %(before_notes)s
  501. Notes
  502. -----
  503. The probability density function for `beta` is:
  504. .. math::
  505. f(x, a, b) = \frac{\Gamma(a+b) x^{a-1} (1-x)^{b-1}}
  506. {\Gamma(a) \Gamma(b)}
  507. for :math:`0 <= x <= 1`, :math:`a > 0`, :math:`b > 0`, where
  508. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  509. `beta` takes :math:`a` and :math:`b` as shape parameters.
  510. This distribution uses routines from the Boost Math C++ library for
  511. the computation of the ``pdf``, ``cdf``, ``ppf``, ``sf`` and ``isf``
  512. methods. [1]_
  513. Maximum likelihood estimates of parameters are only available when the location and
  514. scale are fixed. When either of these parameters is free, ``beta.fit`` resorts to
  515. numerical optimization, but this problem is unbounded: the location and scale may be
  516. chosen to make the minimum and maximum elements of the data coincide with the
  517. endpoints of the support, and the shape parameters may be chosen to make the PDF at
  518. these points infinite. For best results, pass ``floc`` and ``fscale`` keyword
  519. arguments to fix the location and scale, or use `scipy.stats.fit` with
  520. ``method='mse'``.
  521. %(after_notes)s
  522. References
  523. ----------
  524. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  525. %(example)s
  526. """
  527. def _shape_info(self):
  528. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  529. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  530. return [ia, ib]
  531. def _rvs(self, a, b, size=None, random_state=None):
  532. return random_state.beta(a, b, size)
  533. def _pdf(self, x, a, b):
  534. # gamma(a+b) * x**(a-1) * (1-x)**(b-1)
  535. # beta.pdf(x, a, b) = ------------------------------------
  536. # gamma(a)*gamma(b)
  537. with np.errstate(over='ignore'):
  538. return scu._beta_pdf(x, a, b)
  539. def _logpdf(self, x, a, b):
  540. lPx = sc.xlog1py(b - 1.0, -x) + sc.xlogy(a - 1.0, x)
  541. lPx -= sc.betaln(a, b)
  542. return lPx
  543. def _cdf(self, x, a, b):
  544. return sc.betainc(a, b, x)
  545. def _sf(self, x, a, b):
  546. return sc.betaincc(a, b, x)
  547. def _isf(self, x, a, b):
  548. return sc.betainccinv(a, b, x)
  549. def _ppf(self, q, a, b):
  550. return scu._beta_ppf(q, a, b)
  551. def _stats(self, a, b):
  552. a_plus_b = a + b
  553. _beta_mean = a/a_plus_b
  554. _beta_variance = a*b / (a_plus_b**2 * (a_plus_b + 1))
  555. _beta_skewness = ((2 * (b - a) * np.sqrt(a_plus_b + 1)) /
  556. ((a_plus_b + 2) * np.sqrt(a * b)))
  557. _beta_kurtosis_excess_n = 6 * ((a - b)**2 * (a_plus_b + 1) -
  558. a * b * (a_plus_b + 2))
  559. _beta_kurtosis_excess_d = a * b * (a_plus_b + 2) * (a_plus_b + 3)
  560. _beta_kurtosis_excess = _beta_kurtosis_excess_n / _beta_kurtosis_excess_d
  561. return (
  562. _beta_mean,
  563. _beta_variance,
  564. _beta_skewness,
  565. _beta_kurtosis_excess)
  566. def _fitstart(self, data):
  567. if isinstance(data, CensoredData):
  568. data = data._uncensor()
  569. g1 = _skew(data)
  570. g2 = _kurtosis(data)
  571. def func(x):
  572. a, b = x
  573. sk = 2*(b-a)*np.sqrt(a + b + 1) / (a + b + 2) / np.sqrt(a*b)
  574. ku = a**3 - a**2*(2*b-1) + b**2*(b+1) - 2*a*b*(b+2)
  575. ku /= a*b*(a+b+2)*(a+b+3)
  576. ku *= 6
  577. return [sk-g1, ku-g2]
  578. a, b = optimize.fsolve(func, (1.0, 1.0))
  579. return super()._fitstart(data, args=(a, b))
  580. @_call_super_mom
  581. @extend_notes_in_docstring(rv_continuous, notes="""\
  582. In the special case where `method="MLE"` and
  583. both `floc` and `fscale` are given, a
  584. `ValueError` is raised if any value `x` in `data` does not satisfy
  585. `floc < x < floc + fscale`.\n\n""")
  586. def fit(self, data, *args, **kwds):
  587. # Override rv_continuous.fit, so we can more efficiently handle the
  588. # case where floc and fscale are given.
  589. floc = kwds.get('floc', None)
  590. fscale = kwds.get('fscale', None)
  591. if floc is None or fscale is None:
  592. # do general fit
  593. return super().fit(data, *args, **kwds)
  594. # We already got these from kwds, so just pop them.
  595. kwds.pop('floc', None)
  596. kwds.pop('fscale', None)
  597. f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
  598. f1 = _get_fixed_fit_value(kwds, ['f1', 'fb', 'fix_b'])
  599. _remove_optimizer_parameters(kwds)
  600. if f0 is not None and f1 is not None:
  601. # This check is for consistency with `rv_continuous.fit`.
  602. raise ValueError("All parameters fixed. There is nothing to "
  603. "optimize.")
  604. # Special case: loc and scale are constrained, so we are fitting
  605. # just the shape parameters. This can be done much more efficiently
  606. # than the method used in `rv_continuous.fit`. (See the subsection
  607. # "Two unknown parameters" in the section "Maximum likelihood" of
  608. # the Wikipedia article on the Beta distribution for the formulas.)
  609. if not np.isfinite(data).all():
  610. raise ValueError("The data contains non-finite values.")
  611. # Normalize the data to the interval [0, 1].
  612. data = (np.ravel(data) - floc) / fscale
  613. if np.any(data <= 0) or np.any(data >= 1):
  614. raise FitDataError("beta", lower=floc, upper=floc + fscale)
  615. xbar = data.mean()
  616. if f0 is not None or f1 is not None:
  617. # One of the shape parameters is fixed.
  618. if f0 is not None:
  619. # The shape parameter a is fixed, so swap the parameters
  620. # and flip the data. We always solve for `a`. The result
  621. # will be swapped back before returning.
  622. b = f0
  623. data = 1 - data
  624. xbar = 1 - xbar
  625. else:
  626. b = f1
  627. # Initial guess for a. Use the formula for the mean of the beta
  628. # distribution, E[x] = a / (a + b), to generate a reasonable
  629. # starting point based on the mean of the data and the given
  630. # value of b.
  631. a = b * xbar / (1 - xbar)
  632. # Compute the MLE for `a` by solving _beta_mle_a.
  633. theta, info, ier, mesg = optimize.fsolve(
  634. _beta_mle_a, a,
  635. args=(b, len(data), np.log(data).sum()),
  636. full_output=True
  637. )
  638. if ier != 1:
  639. raise FitSolverError(mesg=mesg)
  640. a = theta[0]
  641. if f0 is not None:
  642. # The shape parameter a was fixed, so swap back the
  643. # parameters.
  644. a, b = b, a
  645. else:
  646. # Neither of the shape parameters is fixed.
  647. # s1 and s2 are used in the extra arguments passed to _beta_mle_ab
  648. # by optimize.fsolve.
  649. s1 = np.log(data).sum()
  650. s2 = sc.log1p(-data).sum()
  651. # Use the "method of moments" to estimate the initial
  652. # guess for a and b.
  653. fac = xbar * (1 - xbar) / data.var(ddof=0) - 1
  654. a = xbar * fac
  655. b = (1 - xbar) * fac
  656. # Compute the MLE for a and b by solving _beta_mle_ab.
  657. theta, info, ier, mesg = optimize.fsolve(
  658. _beta_mle_ab, [a, b],
  659. args=(len(data), s1, s2),
  660. full_output=True
  661. )
  662. if ier != 1:
  663. raise FitSolverError(mesg=mesg)
  664. a, b = theta
  665. return a, b, floc, fscale
  666. def _entropy(self, a, b):
  667. def regular(a, b):
  668. return (sc.betaln(a, b) - (a - 1) * sc.psi(a) -
  669. (b - 1) * sc.psi(b) + (a + b - 2) * sc.psi(a + b))
  670. def asymptotic_ab_large(a, b):
  671. sum_ab = a + b
  672. log_term = 0.5 * (
  673. np.log(2*np.pi) + np.log(a) + np.log(b) - 3*np.log(sum_ab) + 1
  674. )
  675. t1 = 110/sum_ab + 20*sum_ab**-2.0 + sum_ab**-3.0 - 2*sum_ab**-4.0
  676. t2 = -50/a - 10*a**-2.0 - a**-3.0 + a**-4.0
  677. t3 = -50/b - 10*b**-2.0 - b**-3.0 + b**-4.0
  678. return log_term + (t1 + t2 + t3) / 120
  679. def asymptotic_b_large(a, b):
  680. sum_ab = a + b
  681. t1 = sc.gammaln(a) - (a - 1) * sc.psi(a)
  682. t2 = (
  683. - 1/(2*b) + 1/(12*b) - b**-2.0/12 - b**-3.0/120 + b**-4.0/120
  684. + b**-5.0/252 - b**-6.0/252 + 1/sum_ab - 1/(12*sum_ab)
  685. + sum_ab**-2.0/6 + sum_ab**-3.0/120 - sum_ab**-4.0/60
  686. - sum_ab**-5.0/252 + sum_ab**-6.0/126
  687. )
  688. log_term = sum_ab*np.log1p(a/b) + np.log(b) - 2*np.log(sum_ab)
  689. return t1 + t2 + log_term
  690. def asymptotic_a_large(a, b):
  691. return asymptotic_b_large(b, a)
  692. def threshold_large(v):
  693. j = np.floor(np.log10(v))
  694. d = np.floor(v / 10 ** j) + 2
  695. return xpx.apply_where(v != 1.0, (d, j), lambda d_, j_: d_ * 10**(7 + j_),
  696. fill_value=1000)
  697. threshold_a = threshold_large(a)
  698. threshold_b = threshold_large(b)
  699. return _lazyselect([(a >= 4.96e6) & (b >= 4.96e6),
  700. (a <= 4.9e6) & (b - a >= 1e6) & (b >= threshold_a),
  701. (b <= 4.9e6) & (a - b >= 1e6) & (a >= threshold_b),
  702. (a < 4.9e6) & (b < 4.9e6)
  703. ],
  704. [asymptotic_ab_large, asymptotic_b_large,
  705. asymptotic_a_large, regular],
  706. [a, b]
  707. )
  708. beta = beta_gen(a=0.0, b=1.0, name='beta')
  709. class betaprime_gen(rv_continuous):
  710. r"""A beta prime continuous random variable.
  711. %(before_notes)s
  712. Notes
  713. -----
  714. The probability density function for `betaprime` is:
  715. .. math::
  716. f(x, a, b) = \frac{x^{a-1} (1+x)^{-a-b}}{\beta(a, b)}
  717. for :math:`x >= 0`, :math:`a > 0`, :math:`b > 0`, where
  718. :math:`\beta(a, b)` is the beta function (see `scipy.special.beta`).
  719. `betaprime` takes ``a`` and ``b`` as shape parameters.
  720. The distribution is related to the `beta` distribution as follows:
  721. If :math:`X` follows a beta distribution with parameters :math:`a, b`,
  722. then :math:`Y = X/(1-X)` has a beta prime distribution with
  723. parameters :math:`a, b` ([1]_).
  724. The beta prime distribution is a reparametrized version of the
  725. F distribution. The beta prime distribution with shape parameters
  726. ``a`` and ``b`` and ``scale = s`` is equivalent to the F distribution
  727. with parameters ``d1 = 2*a``, ``d2 = 2*b`` and ``scale = (a/b)*s``.
  728. For example,
  729. >>> from scipy.stats import betaprime, f
  730. >>> x = [1, 2, 5, 10]
  731. >>> a = 12
  732. >>> b = 5
  733. >>> betaprime.pdf(x, a, b, scale=2)
  734. array([0.00541179, 0.08331299, 0.14669185, 0.03150079])
  735. >>> f.pdf(x, 2*a, 2*b, scale=(a/b)*2)
  736. array([0.00541179, 0.08331299, 0.14669185, 0.03150079])
  737. %(after_notes)s
  738. References
  739. ----------
  740. .. [1] Beta prime distribution, Wikipedia,
  741. https://en.wikipedia.org/wiki/Beta_prime_distribution
  742. %(example)s
  743. """
  744. _support_mask = rv_continuous._open_support_mask
  745. def _shape_info(self):
  746. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  747. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  748. return [ia, ib]
  749. def _rvs(self, a, b, size=None, random_state=None):
  750. u1 = gamma.rvs(a, size=size, random_state=random_state)
  751. u2 = gamma.rvs(b, size=size, random_state=random_state)
  752. return u1 / u2
  753. def _pdf(self, x, a, b):
  754. # betaprime.pdf(x, a, b) = x**(a-1) * (1+x)**(-a-b) / beta(a, b)
  755. return np.exp(self._logpdf(x, a, b))
  756. def _logpdf(self, x, a, b):
  757. return sc.xlogy(a - 1.0, x) - sc.xlog1py(a + b, x) - sc.betaln(a, b)
  758. def _cdf(self, x, a, b):
  759. # note: f2 is the direct way to compute the cdf if the relationship
  760. # to the beta distribution is used.
  761. # however, for very large x, x/(1+x) == 1. since the distribution
  762. # has very fat tails if b is small, this can cause inaccurate results
  763. # use the following relationship of the incomplete beta function:
  764. # betainc(x, a, b) = 1 - betainc(1-x, b, a)
  765. # see gh-17631
  766. return xpx.apply_where(
  767. x > 1, (x, a, b),
  768. lambda x_, a_, b_: beta._sf(1 / (1 + x_), b_, a_),
  769. lambda x_, a_, b_: beta._cdf(x_ / (1 + x_), a_, b_))
  770. def _sf(self, x, a, b):
  771. return xpx.apply_where(
  772. x > 1, (x, a, b),
  773. lambda x_, a_, b_: beta._cdf(1 / (1 + x_), b_, a_),
  774. lambda x_, a_, b_: beta._sf(x_ / (1 + x_), a_, b_))
  775. def _ppf(self, p, a, b):
  776. p, a, b = np.broadcast_arrays(p, a, b)
  777. # By default, compute the ppf by solving the following:
  778. # p = beta._cdf(x/(1+x), a, b). This implies x = r/(1-r) with
  779. # r = beta._ppf(p, a, b). This can cause numerical issues if r is
  780. # very close to 1. In that case, invert the alternative expression of
  781. # the cdf: p = beta._sf(1/(1+x), b, a).
  782. r = stats.beta._ppf(p, a, b)
  783. with np.errstate(divide='ignore'):
  784. out = r / (1 - r)
  785. rnear1 = r > 0.9999
  786. if np.isscalar(r):
  787. if rnear1:
  788. out = 1/stats.beta._isf(p, b, a) - 1
  789. else:
  790. out[rnear1] = 1/stats.beta._isf(p[rnear1], b[rnear1], a[rnear1]) - 1
  791. return out
  792. def _munp(self, n, a, b):
  793. return xpx.apply_where(
  794. b > n, (a, b),
  795. lambda a, b: np.prod([(a+i-1)/(b-i) for i in range(1, int(n)+1)], axis=0),
  796. fill_value=np.inf)
  797. betaprime = betaprime_gen(a=0.0, name='betaprime')
  798. class bradford_gen(rv_continuous):
  799. r"""A Bradford continuous random variable.
  800. %(before_notes)s
  801. Notes
  802. -----
  803. The probability density function for `bradford` is:
  804. .. math::
  805. f(x, c) = \frac{c}{\log(1+c) (1+cx)}
  806. for :math:`0 <= x <= 1` and :math:`c > 0`.
  807. `bradford` takes ``c`` as a shape parameter for :math:`c`.
  808. %(after_notes)s
  809. %(example)s
  810. """
  811. def _shape_info(self):
  812. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  813. def _pdf(self, x, c):
  814. # bradford.pdf(x, c) = c / (k * (1+c*x))
  815. return c / (c*x + 1.0) / sc.log1p(c)
  816. def _cdf(self, x, c):
  817. return sc.log1p(c*x) / sc.log1p(c)
  818. def _ppf(self, q, c):
  819. return sc.expm1(q * sc.log1p(c)) / c
  820. def _stats(self, c, moments='mv'):
  821. k = np.log(1.0+c)
  822. mu = (c-k)/(c*k)
  823. mu2 = ((c+2.0)*k-2.0*c)/(2*c*k*k)
  824. g1 = None
  825. g2 = None
  826. if 's' in moments:
  827. g1 = np.sqrt(2)*(12*c*c-9*c*k*(c+2)+2*k*k*(c*(c+3)+3))
  828. g1 /= np.sqrt(c*(c*(k-2)+2*k))*(3*c*(k-2)+6*k)
  829. if 'k' in moments:
  830. g2 = (c**3*(k-3)*(k*(3*k-16)+24)+12*k*c*c*(k-4)*(k-3) +
  831. 6*c*k*k*(3*k-14) + 12*k**3)
  832. g2 /= 3*c*(c*(k-2)+2*k)**2
  833. return mu, mu2, g1, g2
  834. def _entropy(self, c):
  835. k = np.log(1+c)
  836. return k/2.0 - np.log(c/k)
  837. bradford = bradford_gen(a=0.0, b=1.0, name='bradford')
  838. class burr_gen(rv_continuous):
  839. r"""A Burr (Type III) continuous random variable.
  840. %(before_notes)s
  841. See Also
  842. --------
  843. fisk : a special case of either `burr` or `burr12` with ``d=1``
  844. burr12 : Burr Type XII distribution
  845. mielke : Mielke Beta-Kappa / Dagum distribution
  846. Notes
  847. -----
  848. The probability density function for `burr` is:
  849. .. math::
  850. f(x; c, d) = c d \frac{x^{-c - 1}}
  851. {{(1 + x^{-c})}^{d + 1}}
  852. for :math:`x >= 0` and :math:`c, d > 0`.
  853. `burr` takes ``c`` and ``d`` as shape parameters for :math:`c` and
  854. :math:`d`.
  855. This is the PDF corresponding to the third CDF given in Burr's list;
  856. specifically, it is equation (11) in Burr's paper [1]_. The distribution
  857. is also commonly referred to as the Dagum distribution [2]_. If the
  858. parameter :math:`c < 1` then the mean of the distribution does not
  859. exist and if :math:`c < 2` the variance does not exist [2]_.
  860. The PDF is finite at the left endpoint :math:`x = 0` if :math:`c * d >= 1`.
  861. %(after_notes)s
  862. References
  863. ----------
  864. .. [1] Burr, I. W. "Cumulative frequency functions", Annals of
  865. Mathematical Statistics, 13(2), pp 215-232 (1942).
  866. .. [2] https://en.wikipedia.org/wiki/Dagum_distribution
  867. .. [3] Kleiber, Christian. "A guide to the Dagum distributions."
  868. Modeling Income Distributions and Lorenz Curves pp 97-117 (2008).
  869. %(example)s
  870. """
  871. # Do not set _support_mask to rv_continuous._open_support_mask
  872. # Whether the left-hand endpoint is suitable for pdf evaluation is dependent
  873. # on the values of c and d: if c*d >= 1, the pdf is finite, otherwise infinite.
  874. def _shape_info(self):
  875. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  876. id = _ShapeInfo("d", False, (0, np.inf), (False, False))
  877. return [ic, id]
  878. def _pdf(self, x, c, d):
  879. # burr.pdf(x, c, d) = c * d * x**(-c-1) * (1+x**(-c))**(-d-1)
  880. output = xpx.apply_where(
  881. x == 0, (x, c, d),
  882. lambda x_, c_, d_: c_ * d_ * (x_**(c_*d_-1)) / (1 + x_**c_),
  883. lambda x_, c_, d_: (c_ * d_ * (x_ ** (-c_ - 1.0)) /
  884. ((1 + x_ ** (-c_)) ** (d_ + 1.0))))
  885. return output[()] if output.ndim == 0 else output
  886. def _logpdf(self, x, c, d):
  887. output = xpx.apply_where(
  888. x == 0, (x, c, d),
  889. lambda x_, c_, d_: (np.log(c_) + np.log(d_) + sc.xlogy(c_*d_ - 1, x_)
  890. - (d_+1) * sc.log1p(x_**(c_))),
  891. lambda x_, c_, d_: (np.log(c_) + np.log(d_)
  892. + sc.xlogy(-c_ - 1, x_)
  893. - sc.xlog1py(d_+1, x_**(-c_))))
  894. return output[()] if output.ndim == 0 else output
  895. def _cdf(self, x, c, d):
  896. return (1 + x**(-c))**(-d)
  897. def _logcdf(self, x, c, d):
  898. return sc.log1p(x**(-c)) * (-d)
  899. def _sf(self, x, c, d):
  900. return np.exp(self._logsf(x, c, d))
  901. def _logsf(self, x, c, d):
  902. return np.log1p(- (1 + x**(-c))**(-d))
  903. def _ppf(self, q, c, d):
  904. return (q**(-1.0/d) - 1)**(-1.0/c)
  905. def _isf(self, q, c, d):
  906. _q = sc.xlog1py(-1.0 / d, -q)
  907. return sc.expm1(_q) ** (-1.0 / c)
  908. def _stats(self, c, d):
  909. nc = np.arange(1, 5).reshape(4,1) / c
  910. # ek is the kth raw moment, e1 is the mean e2-e1**2 variance etc.
  911. e1, e2, e3, e4 = sc.beta(d + nc, 1. - nc) * d
  912. mu = np.where(c > 1.0, e1, np.nan)
  913. mu2_if_c = e2 - mu**2
  914. mu2 = np.where(c > 2.0, mu2_if_c, np.nan)
  915. g1 = xpx.apply_where(
  916. c > 3.0, (e1, e2, e3, mu2_if_c),
  917. lambda e1, e2, e3, mu2_if_c: ((e3 - 3*e2*e1 + 2*e1**3)
  918. / np.sqrt((mu2_if_c)**3)),
  919. fill_value=np.nan)
  920. g2 = xpx.apply_where(
  921. c > 4.0, (e1, e2, e3, e4, mu2_if_c),
  922. lambda e1, e2, e3, e4, mu2_if_c: (
  923. ((e4 - 4*e3*e1 + 6*e2*e1**2 - 3*e1**4) / mu2_if_c**2) - 3),
  924. fill_value=np.nan)
  925. if np.ndim(c) == 0:
  926. return mu.item(), mu2.item(), g1.item(), g2.item()
  927. return mu, mu2, g1, g2
  928. def _munp(self, n, c, d):
  929. def __munp(n, c, d):
  930. nc = 1. * n / c
  931. return d * sc.beta(1.0 - nc, d + nc)
  932. n, c, d = np.asarray(n), np.asarray(c), np.asarray(d)
  933. return xpx.apply_where((c > n) & (n == n) & (d == d),
  934. (n, c, d), __munp, fill_value=np.nan)
  935. burr = burr_gen(a=0.0, name='burr')
  936. class burr12_gen(rv_continuous):
  937. r"""A Burr (Type XII) continuous random variable.
  938. %(before_notes)s
  939. See Also
  940. --------
  941. fisk : a special case of either `burr` or `burr12` with ``d=1``
  942. burr : Burr Type III distribution
  943. Notes
  944. -----
  945. The probability density function for `burr12` is:
  946. .. math::
  947. f(x; c, d) = c d \frac{x^{c-1}}
  948. {(1 + x^c)^{d + 1}}
  949. for :math:`x >= 0` and :math:`c, d > 0`.
  950. `burr12` takes ``c`` and ``d`` as shape parameters for :math:`c`
  951. and :math:`d`.
  952. This is the PDF corresponding to the twelfth CDF given in Burr's list;
  953. specifically, it is equation (20) in Burr's paper [1]_.
  954. %(after_notes)s
  955. The Burr type 12 distribution is also sometimes referred to as
  956. the Singh-Maddala distribution from NIST [2]_.
  957. References
  958. ----------
  959. .. [1] Burr, I. W. "Cumulative frequency functions", Annals of
  960. Mathematical Statistics, 13(2), pp 215-232 (1942).
  961. .. [2] https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/b12pdf.htm
  962. .. [3] "Burr distribution",
  963. https://en.wikipedia.org/wiki/Burr_distribution
  964. %(example)s
  965. """
  966. def _shape_info(self):
  967. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  968. id = _ShapeInfo("d", False, (0, np.inf), (False, False))
  969. return [ic, id]
  970. def _pdf(self, x, c, d):
  971. # burr12.pdf(x, c, d) = c * d * x**(c-1) * (1+x**(c))**(-d-1)
  972. return np.exp(self._logpdf(x, c, d))
  973. def _logpdf(self, x, c, d):
  974. return np.log(c) + np.log(d) + sc.xlogy(c - 1, x) + sc.xlog1py(-d-1, x**c)
  975. def _cdf(self, x, c, d):
  976. return -sc.expm1(self._logsf(x, c, d))
  977. def _logcdf(self, x, c, d):
  978. return sc.log1p(-(1 + x**c)**(-d))
  979. def _sf(self, x, c, d):
  980. return np.exp(self._logsf(x, c, d))
  981. def _logsf(self, x, c, d):
  982. return sc.xlog1py(-d, x**c)
  983. def _ppf(self, q, c, d):
  984. # The following is an implementation of
  985. # ((1 - q)**(-1.0/d) - 1)**(1.0/c)
  986. # that does a better job handling small values of q.
  987. return sc.expm1(-1/d * sc.log1p(-q))**(1/c)
  988. def _isf(self, p, c, d):
  989. return sc.expm1(-1/d * np.log(p))**(1/c)
  990. def _munp(self, n, c, d):
  991. def moment_if_exists(n, c, d):
  992. nc = 1. * n / c
  993. return d * sc.beta(1.0 + nc, d - nc)
  994. return xpx.apply_where(c * d > n, (n, c, d), moment_if_exists,
  995. fill_value=np.nan)
  996. burr12 = burr12_gen(a=0.0, name='burr12')
  997. class fisk_gen(burr_gen):
  998. r"""A Fisk continuous random variable.
  999. The Fisk distribution is also known as the log-logistic distribution.
  1000. %(before_notes)s
  1001. See Also
  1002. --------
  1003. burr
  1004. Notes
  1005. -----
  1006. The probability density function for `fisk` is:
  1007. .. math::
  1008. f(x, c) = \frac{c x^{c-1}}
  1009. {(1 + x^c)^2}
  1010. for :math:`x >= 0` and :math:`c > 0`.
  1011. Please note that the above expression can be transformed into the following
  1012. one, which is also commonly used:
  1013. .. math::
  1014. f(x, c) = \frac{c x^{-c-1}}
  1015. {(1 + x^{-c})^2}
  1016. `fisk` takes ``c`` as a shape parameter for :math:`c`.
  1017. `fisk` is a special case of `burr` or `burr12` with ``d=1``.
  1018. Suppose ``X`` is a logistic random variable with location ``l``
  1019. and scale ``s``. Then ``Y = exp(X)`` is a Fisk (log-logistic)
  1020. random variable with ``scale = exp(l)`` and shape ``c = 1/s``.
  1021. %(after_notes)s
  1022. %(example)s
  1023. """
  1024. def _shape_info(self):
  1025. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1026. def _pdf(self, x, c):
  1027. # fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
  1028. return burr._pdf(x, c, 1.0)
  1029. def _cdf(self, x, c):
  1030. return burr._cdf(x, c, 1.0)
  1031. def _sf(self, x, c):
  1032. return burr._sf(x, c, 1.0)
  1033. def _logpdf(self, x, c):
  1034. # fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
  1035. return burr._logpdf(x, c, 1.0)
  1036. def _logcdf(self, x, c):
  1037. return burr._logcdf(x, c, 1.0)
  1038. def _logsf(self, x, c):
  1039. return burr._logsf(x, c, 1.0)
  1040. def _ppf(self, x, c):
  1041. return burr._ppf(x, c, 1.0)
  1042. def _isf(self, q, c):
  1043. return burr._isf(q, c, 1.0)
  1044. def _munp(self, n, c):
  1045. return burr._munp(n, c, 1.0)
  1046. def _stats(self, c):
  1047. return burr._stats(c, 1.0)
  1048. def _entropy(self, c):
  1049. return 2 - np.log(c)
  1050. fisk = fisk_gen(a=0.0, name='fisk')
  1051. class cauchy_gen(rv_continuous):
  1052. r"""A Cauchy continuous random variable.
  1053. %(before_notes)s
  1054. Notes
  1055. -----
  1056. The probability density function for `cauchy` is
  1057. .. math::
  1058. f(x) = \frac{1}{\pi (1 + x^2)}
  1059. for a real number :math:`x`.
  1060. This distribution uses routines from the Boost Math C++ library for
  1061. the computation of the ``ppf`` and ``isf`` methods. [1]_
  1062. %(after_notes)s
  1063. References
  1064. ----------
  1065. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  1066. %(example)s
  1067. """
  1068. def _shape_info(self):
  1069. return []
  1070. def _pdf(self, x):
  1071. # cauchy.pdf(x) = 1 / (pi * (1 + x**2))
  1072. with np.errstate(over='ignore'):
  1073. return 1.0/np.pi/(1.0+x*x)
  1074. def _logpdf(self, x):
  1075. # The formulas
  1076. # log(1/(pi*(1 + x**2))) = -log(pi) - log(1 + x**2)
  1077. # = -log(pi) - log(x**2*(1 + 1/x**2))
  1078. # = -log(pi) - (2log(|x|) + log1p(1/x**2))
  1079. # are used here.
  1080. absx = np.abs(x)
  1081. # In the following apply_where, `f1` provides better precision than `f2`
  1082. # for small and moderate x, while `f2` avoids the overflow that can
  1083. # occur with absx**2.
  1084. return xpx.apply_where(
  1085. absx < 1, absx,
  1086. lambda absx: -_LOG_PI - np.log1p(absx**2),
  1087. lambda absx: (-_LOG_PI - (2*np.log(absx) + np.log1p((1/absx)**2))))
  1088. def _cdf(self, x):
  1089. return np.arctan2(1, -x)/np.pi
  1090. def _ppf(self, q):
  1091. return scu._cauchy_ppf(q, 0, 1)
  1092. def _sf(self, x):
  1093. return np.arctan2(1, x)/np.pi
  1094. def _isf(self, q):
  1095. return scu._cauchy_isf(q, 0, 1)
  1096. def _stats(self):
  1097. return np.nan, np.nan, np.nan, np.nan
  1098. def _entropy(self):
  1099. return np.log(4*np.pi)
  1100. def _fitstart(self, data, args=None):
  1101. # Initialize ML guesses using quartiles instead of moments.
  1102. if isinstance(data, CensoredData):
  1103. data = data._uncensor()
  1104. p25, p50, p75 = np.percentile(data, [25, 50, 75])
  1105. return p50, (p75 - p25)/2
  1106. cauchy = cauchy_gen(name='cauchy')
  1107. class chi_gen(rv_continuous):
  1108. r"""A chi continuous random variable.
  1109. %(before_notes)s
  1110. Notes
  1111. -----
  1112. The probability density function for `chi` is:
  1113. .. math::
  1114. f(x, k) = \frac{1}{2^{k/2-1} \Gamma \left( k/2 \right)}
  1115. x^{k-1} \exp \left( -x^2/2 \right)
  1116. for :math:`x >= 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
  1117. in the implementation). :math:`\Gamma` is the gamma function
  1118. (`scipy.special.gamma`).
  1119. Special cases of `chi` are:
  1120. - ``chi(1, loc, scale)`` is equivalent to `halfnorm`
  1121. - ``chi(2, 0, scale)`` is equivalent to `rayleigh`
  1122. - ``chi(3, 0, scale)`` is equivalent to `maxwell`
  1123. `chi` takes ``df`` as a shape parameter.
  1124. %(after_notes)s
  1125. %(example)s
  1126. """
  1127. def _shape_info(self):
  1128. return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
  1129. def _rvs(self, df, size=None, random_state=None):
  1130. return np.sqrt(chi2.rvs(df, size=size, random_state=random_state))
  1131. def _pdf(self, x, df):
  1132. # x**(df-1) * exp(-x**2/2)
  1133. # chi.pdf(x, df) = -------------------------
  1134. # 2**(df/2-1) * gamma(df/2)
  1135. return np.exp(self._logpdf(x, df))
  1136. def _logpdf(self, x, df):
  1137. l = np.log(2) - .5*np.log(2)*df - sc.gammaln(.5*df)
  1138. return l + sc.xlogy(df - 1., x) - .5*x**2
  1139. def _cdf(self, x, df):
  1140. return sc.gammainc(.5*df, .5*x**2)
  1141. def _sf(self, x, df):
  1142. return sc.gammaincc(.5*df, .5*x**2)
  1143. def _ppf(self, q, df):
  1144. return np.sqrt(2*sc.gammaincinv(.5*df, q))
  1145. def _isf(self, q, df):
  1146. return np.sqrt(2*sc.gammainccinv(.5*df, q))
  1147. def _stats(self, df):
  1148. # poch(df/2, 1/2) = gamma(df/2 + 1/2) / gamma(df/2)
  1149. mu = np.sqrt(2) * sc.poch(0.5 * df, 0.5)
  1150. mu2 = df - mu*mu
  1151. g1 = (2*mu**3.0 + mu*(1-2*df))/np.asarray(np.power(mu2, 1.5))
  1152. g2 = 2*df*(1.0-df)-6*mu**4 + 4*mu**2 * (2*df-1)
  1153. g2 /= np.asarray(mu2**2.0)
  1154. return mu, mu2, g1, g2
  1155. def _entropy(self, df):
  1156. def regular_formula(df):
  1157. return (sc.gammaln(.5 * df)
  1158. + 0.5 * (df - np.log(2) - (df - 1) * sc.digamma(0.5 * df)))
  1159. def asymptotic_formula(df):
  1160. return (0.5 + np.log(np.pi)/2 - (df**-1)/6 - (df**-2)/6
  1161. - 4/45*(df**-3) + (df**-4)/15)
  1162. return xpx.apply_where(df < 300, df, regular_formula, asymptotic_formula)
  1163. chi = chi_gen(a=0.0, name='chi')
  1164. class chi2_gen(rv_continuous):
  1165. r"""A chi-squared continuous random variable.
  1166. For the noncentral chi-square distribution, see `ncx2`.
  1167. %(before_notes)s
  1168. See Also
  1169. --------
  1170. ncx2
  1171. Notes
  1172. -----
  1173. The probability density function for `chi2` is:
  1174. .. math::
  1175. f(x, k) = \frac{1}{2^{k/2} \Gamma \left( k/2 \right)}
  1176. x^{k/2-1} \exp \left( -x/2 \right)
  1177. for :math:`x > 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
  1178. in the implementation).
  1179. `chi2` takes ``df`` as a shape parameter.
  1180. The chi-squared distribution is a special case of the gamma
  1181. distribution, with gamma parameters ``a = df/2``, ``loc = 0`` and
  1182. ``scale = 2``.
  1183. %(after_notes)s
  1184. %(example)s
  1185. """
  1186. def _shape_info(self):
  1187. return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
  1188. def _rvs(self, df, size=None, random_state=None):
  1189. return random_state.chisquare(df, size)
  1190. def _pdf(self, x, df):
  1191. # chi2.pdf(x, df) = 1 / (2*gamma(df/2)) * (x/2)**(df/2-1) * exp(-x/2)
  1192. return np.exp(self._logpdf(x, df))
  1193. def _logpdf(self, x, df):
  1194. return sc.xlogy(df/2.-1, x) - x/2. - sc.gammaln(df/2.) - (np.log(2)*df)/2.
  1195. def _cdf(self, x, df):
  1196. return sc.chdtr(df, x)
  1197. def _sf(self, x, df):
  1198. return sc.chdtrc(df, x)
  1199. def _isf(self, p, df):
  1200. return sc.chdtri(df, p)
  1201. def _ppf(self, p, df):
  1202. return 2*sc.gammaincinv(df/2, p)
  1203. def _stats(self, df):
  1204. mu = df
  1205. mu2 = 2*df
  1206. g1 = 2*np.sqrt(2.0/df)
  1207. g2 = 12.0/df
  1208. return mu, mu2, g1, g2
  1209. def _entropy(self, df):
  1210. half_df = 0.5 * df
  1211. def regular_formula(half_df):
  1212. return (half_df + np.log(2) + sc.gammaln(half_df) +
  1213. (1 - half_df) * sc.psi(half_df))
  1214. def asymptotic_formula(half_df):
  1215. # plug in the above formula the following asymptotic
  1216. # expansions:
  1217. # ln(gamma(a)) ~ (a - 0.5) * ln(a) - a + 0.5 * ln(2 * pi) +
  1218. # 1/(12 * a) - 1/(360 * a**3)
  1219. # psi(a) ~ ln(a) - 1/(2 * a) - 1/(3 * a**2) + 1/120 * a**4)
  1220. c = np.log(2) + 0.5*(1 + np.log(2*np.pi))
  1221. h = 0.5/half_df
  1222. return (h*(-2/3 + h*(-1/3 + h*(-4/45 + h/7.5))) +
  1223. 0.5*np.log(half_df) + c)
  1224. return xpx.apply_where(half_df < 125, half_df,
  1225. regular_formula, asymptotic_formula)
  1226. chi2 = chi2_gen(a=0.0, name='chi2')
  1227. class cosine_gen(rv_continuous):
  1228. r"""A cosine continuous random variable.
  1229. %(before_notes)s
  1230. Notes
  1231. -----
  1232. The cosine distribution is an approximation to the normal distribution.
  1233. The probability density function for `cosine` is:
  1234. .. math::
  1235. f(x) = \frac{1}{2\pi} (1+\cos(x))
  1236. for :math:`-\pi \le x \le \pi`.
  1237. %(after_notes)s
  1238. %(example)s
  1239. """
  1240. def _shape_info(self):
  1241. return []
  1242. def _pdf(self, x):
  1243. # cosine.pdf(x) = 1/(2*pi) * (1+cos(x))
  1244. return 1.0/2/np.pi*(1+np.cos(x))
  1245. def _logpdf(self, x):
  1246. c = np.cos(x)
  1247. return xpx.apply_where(c != -1, c,
  1248. lambda c: np.log1p(c) - np.log(2*np.pi),
  1249. fill_value=-np.inf)
  1250. def _cdf(self, x):
  1251. return scu._cosine_cdf(x)
  1252. def _sf(self, x):
  1253. return scu._cosine_cdf(-x)
  1254. def _ppf(self, p):
  1255. return scu._cosine_invcdf(p)
  1256. def _isf(self, p):
  1257. return -scu._cosine_invcdf(p)
  1258. def _stats(self):
  1259. v = (np.pi * np.pi / 3.0) - 2.0
  1260. k = -6.0 * (np.pi**4 - 90) / (5.0 * (np.pi * np.pi - 6)**2)
  1261. return 0.0, v, 0.0, k
  1262. def _entropy(self):
  1263. return np.log(4*np.pi)-1.0
  1264. cosine = cosine_gen(a=-np.pi, b=np.pi, name='cosine')
  1265. class dgamma_gen(rv_continuous):
  1266. r"""A double gamma continuous random variable.
  1267. The double gamma distribution is also known as the reflected gamma
  1268. distribution [1]_.
  1269. %(before_notes)s
  1270. Notes
  1271. -----
  1272. The probability density function for `dgamma` is:
  1273. .. math::
  1274. f(x, a) = \frac{1}{2\Gamma(a)} |x|^{a-1} \exp(-|x|)
  1275. for a real number :math:`x` and :math:`a > 0`. :math:`\Gamma` is the
  1276. gamma function (`scipy.special.gamma`).
  1277. `dgamma` takes ``a`` as a shape parameter for :math:`a`.
  1278. %(after_notes)s
  1279. References
  1280. ----------
  1281. .. [1] Johnson, Kotz, and Balakrishnan, "Continuous Univariate
  1282. Distributions, Volume 1", Second Edition, John Wiley and Sons
  1283. (1994).
  1284. %(example)s
  1285. """
  1286. def _shape_info(self):
  1287. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  1288. def _rvs(self, a, size=None, random_state=None):
  1289. u = random_state.uniform(size=size)
  1290. gm = gamma.rvs(a, size=size, random_state=random_state)
  1291. return gm * np.where(u >= 0.5, 1, -1)
  1292. def _pdf(self, x, a):
  1293. # dgamma.pdf(x, a) = 1 / (2*gamma(a)) * abs(x)**(a-1) * exp(-abs(x))
  1294. ax = abs(x)
  1295. return 1.0/(2*sc.gamma(a))*ax**(a-1.0) * np.exp(-ax)
  1296. def _logpdf(self, x, a):
  1297. ax = abs(x)
  1298. return sc.xlogy(a - 1.0, ax) - ax - np.log(2) - sc.gammaln(a)
  1299. def _cdf(self, x, a):
  1300. return np.where(x > 0,
  1301. 0.5 + 0.5*sc.gammainc(a, x),
  1302. 0.5*sc.gammaincc(a, -x))
  1303. def _sf(self, x, a):
  1304. return np.where(x > 0,
  1305. 0.5*sc.gammaincc(a, x),
  1306. 0.5 + 0.5*sc.gammainc(a, -x))
  1307. def _entropy(self, a):
  1308. return stats.gamma._entropy(a) - np.log(0.5)
  1309. def _ppf(self, q, a):
  1310. return np.where(q > 0.5,
  1311. sc.gammaincinv(a, 2*q - 1),
  1312. -sc.gammainccinv(a, 2*q))
  1313. def _isf(self, q, a):
  1314. return np.where(q > 0.5,
  1315. -sc.gammaincinv(a, 2*q - 1),
  1316. sc.gammainccinv(a, 2*q))
  1317. def _stats(self, a):
  1318. mu2 = a*(a+1.0)
  1319. return 0.0, mu2, 0.0, (a+2.0)*(a+3.0)/mu2-3.0
  1320. dgamma = dgamma_gen(name='dgamma')
  1321. class dpareto_lognorm_gen(rv_continuous):
  1322. r"""A double Pareto lognormal continuous random variable.
  1323. %(before_notes)s
  1324. Notes
  1325. -----
  1326. The probability density function for `dpareto_lognorm` is:
  1327. .. math::
  1328. f(x, \mu, \sigma, \alpha, \beta) =
  1329. \frac{\alpha \beta}{(\alpha + \beta) x}
  1330. \phi\left( \frac{\log x - \mu}{\sigma} \right)
  1331. \left( R(y_1) + R(y_2) \right)
  1332. where :math:`R(t) = \frac{1 - \Phi(t)}{\phi(t)}`,
  1333. :math:`\phi` and :math:`\Phi` are the normal PDF and CDF, respectively,
  1334. :math:`y_1 = \alpha \sigma - \frac{\log x - \mu}{\sigma}`,
  1335. and :math:`y_2 = \beta \sigma + \frac{\log x - \mu}{\sigma}`
  1336. for real numbers :math:`x` and :math:`\mu`, :math:`\sigma > 0`,
  1337. :math:`\alpha > 0`, and :math:`\beta > 0` [1]_.
  1338. `dpareto_lognorm` takes
  1339. ``u`` as a shape parameter for :math:`\mu`,
  1340. ``s`` as a shape parameter for :math:`\sigma`,
  1341. ``a`` as a shape parameter for :math:`\alpha`, and
  1342. ``b`` as a shape parameter for :math:`\beta`.
  1343. A random variable :math:`X` distributed according to the PDF above
  1344. can be represented as :math:`X = U \frac{V_1}{V_2}` where :math:`U`,
  1345. :math:`V_1`, and :math:`V_2` are independent, :math:`U` is lognormally
  1346. distributed such that :math:`\log U \sim N(\mu, \sigma^2)`, and
  1347. :math:`V_1` and :math:`V_2` follow Pareto distributions with parameters
  1348. :math:`\alpha` and :math:`\beta`, respectively [2]_.
  1349. %(after_notes)s
  1350. References
  1351. ----------
  1352. .. [1] Hajargasht, Gholamreza, and William E. Griffiths. "Pareto-lognormal
  1353. distributions: Inequality, poverty, and estimation from grouped income
  1354. data." Economic Modelling 33 (2013): 593-604.
  1355. .. [2] Reed, William J., and Murray Jorgensen. "The double Pareto-lognormal
  1356. distribution - a new parametric model for size distributions."
  1357. Communications in Statistics - Theory and Methods 33.8 (2004): 1733-1753.
  1358. %(example)s
  1359. """
  1360. _logphi = norm._logpdf
  1361. _logPhi = norm._logcdf
  1362. _logPhic = norm._logsf
  1363. _phi = norm._pdf
  1364. _Phi = norm._cdf
  1365. _Phic = norm._sf
  1366. def _R(self, z):
  1367. return self._Phic(z) / self._phi(z)
  1368. def _logR(self, z):
  1369. return self._logPhic(z) - self._logphi(z)
  1370. def _shape_info(self):
  1371. return [_ShapeInfo("u", False, (-np.inf, np.inf), (False, False)),
  1372. _ShapeInfo("s", False, (0, np.inf), (False, False)),
  1373. _ShapeInfo("a", False, (0, np.inf), (False, False)),
  1374. _ShapeInfo("b", False, (0, np.inf), (False, False))]
  1375. def _argcheck(self, u, s, a, b):
  1376. return (s > 0) & (a > 0) & (b > 0)
  1377. def _rvs(self, u, s, a, b, size=None, random_state=None):
  1378. # From [1] after Equation (12): "To generate pseudo-random
  1379. # deviates from the dPlN distribution, one can exponentiate
  1380. # pseudo-random deviates from NL generated using (6)."
  1381. Z = random_state.normal(u, s, size=size)
  1382. E1 = random_state.standard_exponential(size=size)
  1383. E2 = random_state.standard_exponential(size=size)
  1384. return np.exp(Z + E1 / a - E2 / b)
  1385. def _logpdf(self, x, u, s, a, b):
  1386. with np.errstate(invalid='ignore', divide='ignore'):
  1387. log_y, m = np.log(x), u # compare against [1] Eq. 1
  1388. z = (log_y - m) / s
  1389. x1 = a * s - z
  1390. x2 = b * s + z
  1391. out = np.asarray(np.log(a) + np.log(b) - np.log(a + b) - log_y)
  1392. out += self._logphi(z)
  1393. out += np.logaddexp(self._logR(x1), self._logR(x2))
  1394. out[(x == 0) | np.isinf(x)] = -np.inf
  1395. return out[()]
  1396. def _logcdf(self, x, u, s, a, b):
  1397. with np.errstate(invalid='ignore', divide='ignore'):
  1398. log_y, m = np.log(x), u # compare against [1] Eq. 2
  1399. z = (log_y - m) / s
  1400. x1 = a * s - z
  1401. x2 = b * s + z
  1402. t1 = self._logPhi(z)
  1403. t2 = self._logphi(z)
  1404. t3 = (np.log(b) + self._logR(x1))
  1405. t4 = (np.log(a) + self._logR(x2))
  1406. t1, t2, t3, t4, one = np.broadcast_arrays(t1, t2, t3, t4, 1)
  1407. # t3 can be smaller than t4, so we have to consider log of negative number
  1408. # This would be much simpler, but `return_sign` is available, so use it?
  1409. # t5 = sc.logsumexp([t3, t4 + np.pi*1j])
  1410. t5, sign = sc.logsumexp([t3, t4], b=[one, -one], axis=0, return_sign=True)
  1411. temp = [t1, t2 + t5 - np.log(a + b)]
  1412. out = np.asarray(sc.logsumexp(temp, b=[one, -one*sign], axis=0))
  1413. out[x == 0] = -np.inf
  1414. return out[()]
  1415. def _logsf(self, x, u, s, a, b):
  1416. return scu._log1mexp(self._logcdf(x, u, s, a, b))
  1417. # Infrastructure doesn't seem to do this, so...
  1418. def _pdf(self, x, u, s, a, b):
  1419. return np.exp(self._logpdf(x, u, s, a, b))
  1420. def _cdf(self, x, u, s, a, b):
  1421. return np.exp(self._logcdf(x, u, s, a, b))
  1422. def _sf(self, x, u, s, a, b):
  1423. return np.exp(self._logsf(x, u, s, a, b))
  1424. def _munp(self, n, u, s, a, b):
  1425. m, k = u, float(n) # compare against [1] Eq. 6
  1426. out = (a * b) / ((a - k) * (b + k)) * np.exp(k * m + k ** 2 * s ** 2 / 2)
  1427. out = np.asarray(out)
  1428. out[a <= k] = np.nan
  1429. return out
  1430. dpareto_lognorm = dpareto_lognorm_gen(a=0, name='dpareto_lognorm')
  1431. class dweibull_gen(rv_continuous):
  1432. r"""A double Weibull continuous random variable.
  1433. %(before_notes)s
  1434. Notes
  1435. -----
  1436. The probability density function for `dweibull` is given by
  1437. .. math::
  1438. f(x, c) = c / 2 |x|^{c-1} \exp(-|x|^c)
  1439. for a real number :math:`x` and :math:`c > 0`.
  1440. `dweibull` takes ``c`` as a shape parameter for :math:`c`.
  1441. %(after_notes)s
  1442. %(example)s
  1443. """
  1444. def _shape_info(self):
  1445. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1446. def _rvs(self, c, size=None, random_state=None):
  1447. u = random_state.uniform(size=size)
  1448. w = weibull_min.rvs(c, size=size, random_state=random_state)
  1449. return w * (np.where(u >= 0.5, 1, -1))
  1450. def _pdf(self, x, c):
  1451. # dweibull.pdf(x, c) = c / 2 * abs(x)**(c-1) * exp(-abs(x)**c)
  1452. ax = abs(x)
  1453. Px = c / 2.0 * ax**(c-1.0) * np.exp(-ax**c)
  1454. return Px
  1455. def _logpdf(self, x, c):
  1456. ax = abs(x)
  1457. return np.log(c) - np.log(2.0) + sc.xlogy(c - 1.0, ax) - ax**c
  1458. def _cdf(self, x, c):
  1459. Cx1 = 0.5 * np.exp(-abs(x)**c)
  1460. return np.where(x > 0, 1 - Cx1, Cx1)
  1461. def _ppf(self, q, c):
  1462. fac = 2. * np.where(q <= 0.5, q, 1. - q)
  1463. fac = np.power(-np.log(fac), 1.0 / c)
  1464. return np.where(q > 0.5, fac, -fac)
  1465. def _sf(self, x, c):
  1466. half_weibull_min_sf = 0.5 * stats.weibull_min._sf(np.abs(x), c)
  1467. return np.where(x > 0, half_weibull_min_sf, 1 - half_weibull_min_sf)
  1468. def _isf(self, q, c):
  1469. double_q = 2. * np.where(q <= 0.5, q, 1. - q)
  1470. weibull_min_isf = stats.weibull_min._isf(double_q, c)
  1471. return np.where(q > 0.5, -weibull_min_isf, weibull_min_isf)
  1472. def _munp(self, n, c):
  1473. return (1 - (n % 2)) * sc.gamma(1.0 + 1.0 * n / c)
  1474. # since we know that all odd moments are zeros, return them at once.
  1475. # returning Nones from _stats makes the public stats call _munp
  1476. # so overall we're saving one or two gamma function evaluations here.
  1477. def _stats(self, c):
  1478. return 0, None, 0, None
  1479. def _entropy(self, c):
  1480. h = stats.weibull_min._entropy(c) - np.log(0.5)
  1481. return h
  1482. dweibull = dweibull_gen(name='dweibull')
  1483. class expon_gen(rv_continuous):
  1484. r"""An exponential continuous random variable.
  1485. %(before_notes)s
  1486. Notes
  1487. -----
  1488. The probability density function for `expon` is:
  1489. .. math::
  1490. f(x) = \exp(-x)
  1491. for :math:`x \ge 0`.
  1492. %(after_notes)s
  1493. A common parameterization for `expon` is in terms of the rate parameter
  1494. ``lambda``, such that ``pdf = lambda * exp(-lambda * x)``. This
  1495. parameterization corresponds to using ``scale = 1 / lambda``.
  1496. The exponential distribution is a special case of the gamma
  1497. distributions, with gamma shape parameter ``a = 1``.
  1498. %(example)s
  1499. """
  1500. def _shape_info(self):
  1501. return []
  1502. def _rvs(self, size=None, random_state=None):
  1503. return random_state.standard_exponential(size)
  1504. def _pdf(self, x):
  1505. # expon.pdf(x) = exp(-x)
  1506. return np.exp(-x)
  1507. def _logpdf(self, x):
  1508. return -x
  1509. def _cdf(self, x):
  1510. return -sc.expm1(-x)
  1511. def _ppf(self, q):
  1512. return -sc.log1p(-q)
  1513. def _sf(self, x):
  1514. return np.exp(-x)
  1515. def _logsf(self, x):
  1516. return -x
  1517. def _isf(self, q):
  1518. return -np.log(q)
  1519. def _stats(self):
  1520. return 1.0, 1.0, 2.0, 6.0
  1521. def _entropy(self):
  1522. return 1.0
  1523. @_call_super_mom
  1524. @replace_notes_in_docstring(rv_continuous, notes="""\
  1525. When `method='MLE'`,
  1526. this function uses explicit formulas for the maximum likelihood
  1527. estimation of the exponential distribution parameters, so the
  1528. `optimizer`, `loc` and `scale` keyword arguments are
  1529. ignored.\n\n""")
  1530. def fit(self, data, *args, **kwds):
  1531. if len(args) > 0:
  1532. raise TypeError("Too many arguments.")
  1533. floc = kwds.pop('floc', None)
  1534. fscale = kwds.pop('fscale', None)
  1535. _remove_optimizer_parameters(kwds)
  1536. if floc is not None and fscale is not None:
  1537. # This check is for consistency with `rv_continuous.fit`.
  1538. raise ValueError("All parameters fixed. There is nothing to "
  1539. "optimize.")
  1540. data = np.asarray(data)
  1541. if not np.isfinite(data).all():
  1542. raise ValueError("The data contains non-finite values.")
  1543. data_min = data.min()
  1544. if floc is None:
  1545. # ML estimate of the location is the minimum of the data.
  1546. loc = data_min
  1547. else:
  1548. loc = floc
  1549. if data_min < loc:
  1550. # There are values that are less than the specified loc.
  1551. raise FitDataError("expon", lower=floc, upper=np.inf)
  1552. if fscale is None:
  1553. # ML estimate of the scale is the shifted mean.
  1554. scale = data.mean() - loc
  1555. else:
  1556. scale = fscale
  1557. # We expect the return values to be floating point, so ensure it
  1558. # by explicitly converting to float.
  1559. return float(loc), float(scale)
  1560. expon = expon_gen(a=0.0, name='expon')
  1561. class exponnorm_gen(rv_continuous):
  1562. r"""An exponentially modified Normal continuous random variable.
  1563. Also known as the exponentially modified Gaussian distribution [1]_.
  1564. %(before_notes)s
  1565. Notes
  1566. -----
  1567. The probability density function for `exponnorm` is:
  1568. .. math::
  1569. f(x, K) = \frac{1}{2K} \exp\left(\frac{1}{2 K^2} - x / K \right)
  1570. \text{erfc}\left(-\frac{x - 1/K}{\sqrt{2}}\right)
  1571. where :math:`x` is a real number and :math:`K > 0`.
  1572. It can be thought of as the sum of a standard normal random variable
  1573. and an independent exponentially distributed random variable with rate
  1574. ``1/K``.
  1575. %(after_notes)s
  1576. An alternative parameterization of this distribution (for example, in
  1577. the Wikipedia article [1]_) involves three parameters, :math:`\mu`,
  1578. :math:`\lambda` and :math:`\sigma`.
  1579. In the present parameterization this corresponds to having ``loc`` and
  1580. ``scale`` equal to :math:`\mu` and :math:`\sigma`, respectively, and
  1581. shape parameter :math:`K = 1/(\sigma\lambda)`.
  1582. .. versionadded:: 0.16.0
  1583. References
  1584. ----------
  1585. .. [1] Exponentially modified Gaussian distribution, Wikipedia,
  1586. https://en.wikipedia.org/wiki/Exponentially_modified_Gaussian_distribution
  1587. %(example)s
  1588. """
  1589. def _shape_info(self):
  1590. return [_ShapeInfo("K", False, (0, np.inf), (False, False))]
  1591. def _rvs(self, K, size=None, random_state=None):
  1592. expval = random_state.standard_exponential(size) * K
  1593. gval = random_state.standard_normal(size)
  1594. return expval + gval
  1595. def _pdf(self, x, K):
  1596. return np.exp(self._logpdf(x, K))
  1597. def _logpdf(self, x, K):
  1598. invK = 1.0 / K
  1599. exparg = invK * (0.5 * invK - x)
  1600. return exparg + _norm_logcdf(x - invK) - np.log(K)
  1601. def _cdf(self, x, K):
  1602. invK = 1.0 / K
  1603. expval = invK * (0.5 * invK - x)
  1604. logprod = expval + _norm_logcdf(x - invK)
  1605. return _norm_cdf(x) - np.exp(logprod)
  1606. def _sf(self, x, K):
  1607. invK = 1.0 / K
  1608. expval = invK * (0.5 * invK - x)
  1609. logprod = expval + _norm_logcdf(x - invK)
  1610. return _norm_cdf(-x) + np.exp(logprod)
  1611. def _stats(self, K):
  1612. K2 = K * K
  1613. opK2 = 1.0 + K2
  1614. skw = 2 * K**3 * opK2**(-1.5)
  1615. krt = 6.0 * K2 * K2 * opK2**(-2)
  1616. return K, opK2, skw, krt
  1617. exponnorm = exponnorm_gen(name='exponnorm')
  1618. def _pow1pm1(x, y):
  1619. """
  1620. Compute (1 + x)**y - 1.
  1621. Uses expm1 and xlog1py to avoid loss of precision when
  1622. (1 + x)**y is close to 1.
  1623. Note that the inverse of this function with respect to x is
  1624. ``_pow1pm1(x, 1/y)``. That is, if
  1625. t = _pow1pm1(x, y)
  1626. then
  1627. x = _pow1pm1(t, 1/y)
  1628. """
  1629. return np.expm1(sc.xlog1py(y, x))
  1630. class exponweib_gen(rv_continuous):
  1631. r"""An exponentiated Weibull continuous random variable.
  1632. %(before_notes)s
  1633. See Also
  1634. --------
  1635. weibull_min, numpy.random.Generator.weibull
  1636. Notes
  1637. -----
  1638. The probability density function for `exponweib` is:
  1639. .. math::
  1640. f(x, a, c) = a c [1-\exp(-x^c)]^{a-1} \exp(-x^c) x^{c-1}
  1641. and its cumulative distribution function is:
  1642. .. math::
  1643. F(x, a, c) = [1-\exp(-x^c)]^a
  1644. for :math:`x > 0`, :math:`a > 0`, :math:`c > 0`.
  1645. `exponweib` takes :math:`a` and :math:`c` as shape parameters:
  1646. * :math:`a` is the exponentiation parameter,
  1647. with the special case :math:`a=1` corresponding to the
  1648. (non-exponentiated) Weibull distribution `weibull_min`.
  1649. * :math:`c` is the shape parameter of the non-exponentiated Weibull law.
  1650. %(after_notes)s
  1651. References
  1652. ----------
  1653. https://en.wikipedia.org/wiki/Exponentiated_Weibull_distribution
  1654. %(example)s
  1655. """
  1656. def _shape_info(self):
  1657. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  1658. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  1659. return [ia, ic]
  1660. def _pdf(self, x, a, c):
  1661. # exponweib.pdf(x, a, c) =
  1662. # a * c * (1-exp(-x**c))**(a-1) * exp(-x**c)*x**(c-1)
  1663. return np.exp(self._logpdf(x, a, c))
  1664. def _logpdf(self, x, a, c):
  1665. negxc = -x**c
  1666. exm1c = -sc.expm1(negxc)
  1667. logp = (np.log(a) + np.log(c) + sc.xlogy(a - 1.0, exm1c) +
  1668. negxc + sc.xlogy(c - 1.0, x))
  1669. return logp
  1670. def _cdf(self, x, a, c):
  1671. exm1c = -sc.expm1(-x**c)
  1672. return exm1c**a
  1673. def _ppf(self, q, a, c):
  1674. return (-sc.log1p(-q**(1.0/a)))**np.asarray(1.0/c)
  1675. def _sf(self, x, a, c):
  1676. return -_pow1pm1(-np.exp(-x**c), a)
  1677. def _isf(self, p, a, c):
  1678. return (-np.log(-_pow1pm1(-p, 1/a)))**(1/c)
  1679. exponweib = exponweib_gen(a=0.0, name='exponweib')
  1680. class exponpow_gen(rv_continuous):
  1681. r"""An exponential power continuous random variable.
  1682. %(before_notes)s
  1683. Notes
  1684. -----
  1685. The probability density function for `exponpow` is:
  1686. .. math::
  1687. f(x, b) = b x^{b-1} \exp(1 + x^b - \exp(x^b))
  1688. for :math:`x \ge 0`, :math:`b > 0`. Note that this is a different
  1689. distribution from the exponential power distribution that is also known
  1690. under the names "generalized normal" or "generalized Gaussian".
  1691. `exponpow` takes ``b`` as a shape parameter for :math:`b`.
  1692. %(after_notes)s
  1693. References
  1694. ----------
  1695. http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Exponentialpower.pdf
  1696. %(example)s
  1697. """
  1698. def _shape_info(self):
  1699. return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
  1700. def _pdf(self, x, b):
  1701. # exponpow.pdf(x, b) = b * x**(b-1) * exp(1 + x**b - exp(x**b))
  1702. return np.exp(self._logpdf(x, b))
  1703. def _logpdf(self, x, b):
  1704. xb = x**b
  1705. f = 1 + np.log(b) + sc.xlogy(b - 1.0, x) + xb - np.exp(xb)
  1706. return f
  1707. def _cdf(self, x, b):
  1708. return -sc.expm1(-sc.expm1(x**b))
  1709. def _sf(self, x, b):
  1710. return np.exp(-sc.expm1(x**b))
  1711. def _isf(self, x, b):
  1712. return (sc.log1p(-np.log(x)))**(1./b)
  1713. def _ppf(self, q, b):
  1714. return pow(sc.log1p(-sc.log1p(-q)), 1.0/b)
  1715. exponpow = exponpow_gen(a=0.0, name='exponpow')
  1716. class fatiguelife_gen(rv_continuous):
  1717. r"""A fatigue-life (Birnbaum-Saunders) continuous random variable.
  1718. %(before_notes)s
  1719. Notes
  1720. -----
  1721. The probability density function for `fatiguelife` is:
  1722. .. math::
  1723. f(x, c) = \frac{x+1}{2c\sqrt{2\pi x^3}} \exp(-\frac{(x-1)^2}{2x c^2})
  1724. for :math:`x >= 0` and :math:`c > 0`.
  1725. `fatiguelife` takes ``c`` as a shape parameter for :math:`c`.
  1726. %(after_notes)s
  1727. References
  1728. ----------
  1729. .. [1] "Birnbaum-Saunders distribution",
  1730. https://en.wikipedia.org/wiki/Birnbaum-Saunders_distribution
  1731. %(example)s
  1732. """
  1733. _support_mask = rv_continuous._open_support_mask
  1734. def _shape_info(self):
  1735. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1736. def _rvs(self, c, size=None, random_state=None):
  1737. z = random_state.standard_normal(size)
  1738. x = 0.5*c*z
  1739. x2 = x*x
  1740. t = 1.0 + 2*x2 + 2*x*np.sqrt(1 + x2)
  1741. return t
  1742. def _pdf(self, x, c):
  1743. # fatiguelife.pdf(x, c) =
  1744. # (x+1) / (2*c*sqrt(2*pi*x**3)) * exp(-(x-1)**2/(2*x*c**2))
  1745. return np.exp(self._logpdf(x, c))
  1746. def _logpdf(self, x, c):
  1747. return (np.log(x+1) - (x-1)**2 / (2.0*x*c**2) - np.log(2*c) -
  1748. 0.5*(np.log(2*np.pi) + 3*np.log(x)))
  1749. def _cdf(self, x, c):
  1750. return _norm_cdf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
  1751. def _ppf(self, q, c):
  1752. tmp = c * _norm_ppf(q)
  1753. return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
  1754. def _sf(self, x, c):
  1755. return _norm_sf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
  1756. def _isf(self, q, c):
  1757. tmp = -c * _norm_ppf(q)
  1758. return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
  1759. def _stats(self, c):
  1760. # NB: the formula for kurtosis in wikipedia seems to have an error:
  1761. # it's 40, not 41. At least it disagrees with the one from Wolfram
  1762. # Alpha. And the latter one, below, passes the tests, while the wiki
  1763. # one doesn't So far I didn't have the guts to actually check the
  1764. # coefficients from the expressions for the raw moments.
  1765. c2 = c*c
  1766. mu = c2 / 2.0 + 1.0
  1767. den = 5.0 * c2 + 4.0
  1768. mu2 = c2*den / 4.0
  1769. g1 = 4 * c * (11*c2 + 6.0) / np.power(den, 1.5)
  1770. g2 = 6 * c2 * (93*c2 + 40.0) / den**2.0
  1771. return mu, mu2, g1, g2
  1772. fatiguelife = fatiguelife_gen(a=0.0, name='fatiguelife')
  1773. class foldcauchy_gen(rv_continuous):
  1774. r"""A folded Cauchy continuous random variable.
  1775. %(before_notes)s
  1776. Notes
  1777. -----
  1778. The probability density function for `foldcauchy` is:
  1779. .. math::
  1780. f(x, c) = \frac{1}{\pi (1+(x-c)^2)} + \frac{1}{\pi (1+(x+c)^2)}
  1781. for :math:`x \ge 0` and :math:`c \ge 0`.
  1782. `foldcauchy` takes ``c`` as a shape parameter for :math:`c`.
  1783. %(example)s
  1784. """
  1785. def _argcheck(self, c):
  1786. return c >= 0
  1787. def _shape_info(self):
  1788. return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
  1789. def _rvs(self, c, size=None, random_state=None):
  1790. return abs(cauchy.rvs(loc=c, size=size,
  1791. random_state=random_state))
  1792. def _pdf(self, x, c):
  1793. # foldcauchy.pdf(x, c) = 1/(pi*(1+(x-c)**2)) + 1/(pi*(1+(x+c)**2))
  1794. return 1.0/np.pi*(1.0/(1+(x-c)**2) + 1.0/(1+(x+c)**2))
  1795. def _cdf(self, x, c):
  1796. return 1.0/np.pi*(np.arctan(x-c) + np.arctan(x+c))
  1797. def _sf(self, x, c):
  1798. # 1 - CDF(x, c) = 1 - (atan(x - c) + atan(x + c))/pi
  1799. # = ((pi/2 - atan(x - c)) + (pi/2 - atan(x + c)))/pi
  1800. # = (acot(x - c) + acot(x + c))/pi
  1801. # = (atan2(1, x - c) + atan2(1, x + c))/pi
  1802. return (np.arctan2(1, x - c) + np.arctan2(1, x + c))/np.pi
  1803. def _stats(self, c):
  1804. return np.inf, np.inf, np.nan, np.nan
  1805. foldcauchy = foldcauchy_gen(a=0.0, name='foldcauchy')
  1806. class f_gen(rv_continuous):
  1807. r"""An F continuous random variable.
  1808. For the noncentral F distribution, see `ncf`.
  1809. %(before_notes)s
  1810. See Also
  1811. --------
  1812. ncf
  1813. Notes
  1814. -----
  1815. The F distribution with :math:`df_1 > 0` and :math:`df_2 > 0` degrees of freedom is
  1816. the distribution of the ratio of two independent chi-squared distributions with
  1817. :math:`df_1` and :math:`df_2` degrees of freedom, after rescaling by
  1818. :math:`df_2 / df_1`.
  1819. The probability density function for `f` is:
  1820. .. math::
  1821. f(x, df_1, df_2) = \frac{df_2^{df_2/2} df_1^{df_1/2} x^{df_1 / 2-1}}
  1822. {(df_2+df_1 x)^{(df_1+df_2)/2}
  1823. B(df_1/2, df_2/2)}
  1824. for :math:`x > 0`.
  1825. `f` accepts shape parameters ``dfn`` and ``dfd`` for :math:`df_1`, the degrees of
  1826. freedom of the chi-squared distribution in the numerator, and :math:`df_2`, the
  1827. degrees of freedom of the chi-squared distribution in the denominator, respectively.
  1828. %(after_notes)s
  1829. %(example)s
  1830. """
  1831. def _shape_info(self):
  1832. idfn = _ShapeInfo("dfn", False, (0, np.inf), (False, False))
  1833. idfd = _ShapeInfo("dfd", False, (0, np.inf), (False, False))
  1834. return [idfn, idfd]
  1835. def _rvs(self, dfn, dfd, size=None, random_state=None):
  1836. return random_state.f(dfn, dfd, size)
  1837. def _pdf(self, x, dfn, dfd):
  1838. # df2**(df2/2) * df1**(df1/2) * x**(df1/2-1)
  1839. # F.pdf(x, df1, df2) = --------------------------------------------
  1840. # (df2+df1*x)**((df1+df2)/2) * B(df1/2, df2/2)
  1841. return np.exp(self._logpdf(x, dfn, dfd))
  1842. def _logpdf(self, x, dfn, dfd):
  1843. n = 1.0 * dfn
  1844. m = 1.0 * dfd
  1845. lPx = (m/2 * np.log(m) + n/2 * np.log(n) + sc.xlogy(n/2 - 1, x)
  1846. - (((n+m)/2) * np.log(m + n*x) + sc.betaln(n/2, m/2)))
  1847. return lPx
  1848. def _cdf(self, x, dfn, dfd):
  1849. return sc.fdtr(dfn, dfd, x)
  1850. def _sf(self, x, dfn, dfd):
  1851. return sc.fdtrc(dfn, dfd, x)
  1852. def _ppf(self, q, dfn, dfd):
  1853. return sc.fdtri(dfn, dfd, q)
  1854. def _stats(self, dfn, dfd):
  1855. v1, v2 = 1. * dfn, 1. * dfd
  1856. v2_2, v2_4, v2_6, v2_8 = v2 - 2., v2 - 4., v2 - 6., v2 - 8.
  1857. mu = xpx.apply_where(
  1858. v2 > 2, (v2, v2_2),
  1859. lambda v2, v2_2: v2 / v2_2,
  1860. fill_value=np.inf)
  1861. mu2 = xpx.apply_where(
  1862. v2 > 4, (v1, v2, v2_2, v2_4),
  1863. lambda v1, v2, v2_2, v2_4:
  1864. 2 * v2 * v2 * (v1 + v2_2) / (v1 * v2_2**2 * v2_4),
  1865. fill_value=np.inf)
  1866. g1 = xpx.apply_where(
  1867. v2 > 6, (v1, v2_2, v2_4, v2_6),
  1868. lambda v1, v2_2, v2_4, v2_6:
  1869. (2 * v1 + v2_2) / v2_6 * np.sqrt(v2_4 / (v1 * (v1 + v2_2))),
  1870. fill_value=np.nan)
  1871. g1 *= np.sqrt(8.)
  1872. g2 = xpx.apply_where(
  1873. v2 > 8, (g1, v2_6, v2_8),
  1874. lambda g1, v2_6, v2_8: (8 + g1 * g1 * v2_6) / v2_8,
  1875. fill_value=np.nan)
  1876. g2 *= 3. / 2.
  1877. return mu, mu2, g1, g2
  1878. def _entropy(self, dfn, dfd):
  1879. # the formula found in literature is incorrect. This one yields the
  1880. # same result as numerical integration using the generic entropy
  1881. # definition. This is also tested in tests/test_conntinous_basic
  1882. half_dfn = 0.5 * dfn
  1883. half_dfd = 0.5 * dfd
  1884. half_sum = 0.5 * (dfn + dfd)
  1885. return (np.log(dfd) - np.log(dfn) + sc.betaln(half_dfn, half_dfd) +
  1886. (1 - half_dfn) * sc.psi(half_dfn) - (1 + half_dfd) *
  1887. sc.psi(half_dfd) + half_sum * sc.psi(half_sum))
  1888. f = f_gen(a=0.0, name='f')
  1889. ## Folded Normal
  1890. ## abs(Z) where (Z is normal with mu=L and std=S so that c=abs(L)/S)
  1891. ##
  1892. ## note: regress docs have scale parameter correct, but first parameter
  1893. ## he gives is a shape parameter A = c * scale
  1894. ## Half-normal is folded normal with shape-parameter c=0.
  1895. class foldnorm_gen(rv_continuous):
  1896. r"""A folded normal continuous random variable.
  1897. %(before_notes)s
  1898. Notes
  1899. -----
  1900. The probability density function for `foldnorm` is:
  1901. .. math::
  1902. f(x, c) = \sqrt{2/\pi} cosh(c x) \exp(-\frac{x^2+c^2}{2})
  1903. for :math:`x \ge 0` and :math:`c \ge 0`.
  1904. `foldnorm` takes ``c`` as a shape parameter for :math:`c`.
  1905. %(after_notes)s
  1906. %(example)s
  1907. """
  1908. def _argcheck(self, c):
  1909. return c >= 0
  1910. def _shape_info(self):
  1911. return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
  1912. def _rvs(self, c, size=None, random_state=None):
  1913. return abs(random_state.standard_normal(size) + c)
  1914. def _pdf(self, x, c):
  1915. # foldnormal.pdf(x, c) = sqrt(2/pi) * cosh(c*x) * exp(-(x**2+c**2)/2)
  1916. return _norm_pdf(x + c) + _norm_pdf(x-c)
  1917. def _cdf(self, x, c):
  1918. sqrt_two = np.sqrt(2)
  1919. return 0.5 * (sc.erf((x - c)/sqrt_two) + sc.erf((x + c)/sqrt_two))
  1920. def _sf(self, x, c):
  1921. return _norm_sf(x - c) + _norm_sf(x + c)
  1922. def _stats(self, c):
  1923. # Regina C. Elandt, Technometrics 3, 551 (1961)
  1924. # https://www.jstor.org/stable/1266561
  1925. #
  1926. c2 = c*c
  1927. expfac = np.exp(-0.5*c2) / np.sqrt(2.*np.pi)
  1928. mu = 2.*expfac + c * sc.erf(c/np.sqrt(2))
  1929. mu2 = c2 + 1 - mu*mu
  1930. g1 = 2. * (mu*mu*mu - c2*mu - expfac)
  1931. g1 /= np.power(mu2, 1.5)
  1932. g2 = c2 * (c2 + 6.) + 3 + 8.*expfac*mu
  1933. g2 += (2. * (c2 - 3.) - 3. * mu**2) * mu**2
  1934. g2 = g2 / mu2**2.0 - 3.
  1935. return mu, mu2, g1, g2
  1936. foldnorm = foldnorm_gen(a=0.0, name='foldnorm')
  1937. class weibull_min_gen(rv_continuous):
  1938. r"""Weibull minimum continuous random variable.
  1939. The Weibull Minimum Extreme Value distribution, from extreme value theory
  1940. (Fisher-Gnedenko theorem), is also often simply called the Weibull
  1941. distribution. It arises as the limiting distribution of the rescaled
  1942. minimum of iid random variables.
  1943. %(before_notes)s
  1944. See Also
  1945. --------
  1946. weibull_max, numpy.random.Generator.weibull, exponweib
  1947. Notes
  1948. -----
  1949. The probability density function for `weibull_min` is:
  1950. .. math::
  1951. f(x, c) = c x^{c-1} \exp(-x^c)
  1952. for :math:`x > 0`, :math:`c > 0`.
  1953. `weibull_min` takes ``c`` as a shape parameter for :math:`c`.
  1954. (named :math:`k` in Wikipedia article and :math:`a` in
  1955. ``numpy.random.weibull``). Special shape values are :math:`c=1` and
  1956. :math:`c=2` where Weibull distribution reduces to the `expon` and
  1957. `rayleigh` distributions respectively.
  1958. Suppose ``X`` is an exponentially distributed random variable with
  1959. scale ``s``. Then ``Y = X**k`` is `weibull_min` distributed with shape
  1960. ``c = 1/k`` and scale ``s**k``.
  1961. %(after_notes)s
  1962. References
  1963. ----------
  1964. https://en.wikipedia.org/wiki/Weibull_distribution
  1965. https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
  1966. %(example)s
  1967. """
  1968. def _shape_info(self):
  1969. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1970. def _pdf(self, x, c):
  1971. # weibull_min.pdf(x, c) = c * x**(c-1) * exp(-x**c)
  1972. return c*pow(x, c-1)*np.exp(-pow(x, c))
  1973. def _logpdf(self, x, c):
  1974. return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c)
  1975. def _cdf(self, x, c):
  1976. return -sc.expm1(-pow(x, c))
  1977. def _ppf(self, q, c):
  1978. return pow(-sc.log1p(-q), 1.0/c)
  1979. def _sf(self, x, c):
  1980. return np.exp(self._logsf(x, c))
  1981. def _logsf(self, x, c):
  1982. return -pow(x, c)
  1983. def _isf(self, q, c):
  1984. return (-np.log(q))**(1/c)
  1985. def _munp(self, n, c):
  1986. return sc.gamma(1.0+n*1.0/c)
  1987. def _entropy(self, c):
  1988. return -_EULER / c - np.log(c) + _EULER + 1
  1989. @extend_notes_in_docstring(rv_continuous, notes="""\
  1990. If ``method='mm'``, parameters fixed by the user are respected, and the
  1991. remaining parameters are used to match distribution and sample moments
  1992. where possible. For example, if the user fixes the location with
  1993. ``floc``, the parameters will only match the distribution skewness and
  1994. variance to the sample skewness and variance; no attempt will be made
  1995. to match the means or minimize a norm of the errors.
  1996. \n\n""")
  1997. def fit(self, data, *args, **kwds):
  1998. if isinstance(data, CensoredData):
  1999. if data.num_censored() == 0:
  2000. data = data._uncensor()
  2001. else:
  2002. return super().fit(data, *args, **kwds)
  2003. if kwds.pop('superfit', False):
  2004. return super().fit(data, *args, **kwds)
  2005. # this extracts fixed shape, location, and scale however they
  2006. # are specified, and also leaves them in `kwds`
  2007. data, fc, floc, fscale = _check_fit_input_parameters(self, data,
  2008. args, kwds)
  2009. method = kwds.get("method", "mle").lower()
  2010. # See https://en.wikipedia.org/wiki/Weibull_distribution#Moments for
  2011. # moment formulas.
  2012. def skew(c):
  2013. gamma1 = sc.gamma(1+1/c)
  2014. gamma2 = sc.gamma(1+2/c)
  2015. gamma3 = sc.gamma(1+3/c)
  2016. num = 2 * gamma1**3 - 3*gamma1*gamma2 + gamma3
  2017. den = (gamma2 - gamma1**2)**(3/2)
  2018. return num/den
  2019. # For c in [1e2, 3e4], population skewness appears to approach
  2020. # asymptote near -1.139, but past c > 3e4, skewness begins to vary
  2021. # wildly, and MoM won't provide a good guess. Get out early.
  2022. s = stats.skew(data)
  2023. max_c = 1e4
  2024. s_min = skew(max_c)
  2025. if s < s_min and method != "mm" and fc is None and not args:
  2026. return super().fit(data, *args, **kwds)
  2027. # If method is method of moments, we don't need the user's guesses.
  2028. # Otherwise, extract the guesses from args and kwds.
  2029. if method == "mm":
  2030. c, loc, scale = None, None, None
  2031. else:
  2032. c = args[0] if len(args) else None
  2033. loc = kwds.pop('loc', None)
  2034. scale = kwds.pop('scale', None)
  2035. if fc is None and c is None: # not fixed and no guess: use MoM
  2036. # Solve for c that matches sample distribution skewness to sample
  2037. # skewness.
  2038. # we start having numerical issues with `weibull_min` with
  2039. # parameters outside this range - and not just in this method.
  2040. # We could probably improve the situation by doing everything
  2041. # in the log space, but that is for another time.
  2042. c = root_scalar(lambda c: skew(c) - s, bracket=[0.02, max_c],
  2043. method='bisect').root
  2044. elif fc is not None: # fixed: use it
  2045. c = fc
  2046. if fscale is None and scale is None:
  2047. v = np.var(data)
  2048. scale = np.sqrt(v / (sc.gamma(1+2/c) - sc.gamma(1+1/c)**2))
  2049. elif fscale is not None:
  2050. scale = fscale
  2051. if floc is None and loc is None:
  2052. m = np.mean(data)
  2053. loc = m - scale*sc.gamma(1 + 1/c)
  2054. elif floc is not None:
  2055. loc = floc
  2056. if method == 'mm':
  2057. return c, loc, scale
  2058. else:
  2059. # At this point, parameter "guesses" may equal the fixed parameters
  2060. # in kwds. No harm in passing them as guesses, too.
  2061. return super().fit(data, c, loc=loc, scale=scale, **kwds)
  2062. weibull_min = weibull_min_gen(a=0.0, name='weibull_min')
  2063. class truncweibull_min_gen(rv_continuous):
  2064. r"""A doubly truncated Weibull minimum continuous random variable.
  2065. %(before_notes)s
  2066. See Also
  2067. --------
  2068. weibull_min, truncexpon
  2069. Notes
  2070. -----
  2071. The probability density function for `truncweibull_min` is:
  2072. .. math::
  2073. f(x, a, b, c) = \frac{c x^{c-1} \exp(-x^c)}{\exp(-a^c) - \exp(-b^c)}
  2074. for :math:`a < x <= b`, :math:`0 \le a < b` and :math:`c > 0`.
  2075. `truncweibull_min` takes :math:`a`, :math:`b`, and :math:`c` as shape
  2076. parameters.
  2077. Notice that the truncation values, :math:`a` and :math:`b`, are defined in
  2078. standardized form:
  2079. .. math::
  2080. a = (u_l - loc)/scale
  2081. b = (u_r - loc)/scale
  2082. where :math:`u_l` and :math:`u_r` are the specific left and right
  2083. truncation values, respectively. In other words, the support of the
  2084. distribution becomes :math:`(a*scale + loc) < x <= (b*scale + loc)` when
  2085. :math:`loc` and/or :math:`scale` are provided.
  2086. %(after_notes)s
  2087. References
  2088. ----------
  2089. .. [1] Rinne, H. "The Weibull Distribution: A Handbook". CRC Press (2009).
  2090. %(example)s
  2091. """
  2092. def _argcheck(self, c, a, b):
  2093. return (a >= 0.) & (b > a) & (c > 0.)
  2094. def _shape_info(self):
  2095. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  2096. ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
  2097. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  2098. return [ic, ia, ib]
  2099. def _fitstart(self, data):
  2100. # Arbitrary, but default a=b=c=1 is not valid
  2101. return super()._fitstart(data, args=(1, 0, 1))
  2102. def _get_support(self, c, a, b):
  2103. return a, b
  2104. def _pdf(self, x, c, a, b):
  2105. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2106. return (c * pow(x, c-1) * np.exp(-pow(x, c))) / denum
  2107. def _logpdf(self, x, c, a, b):
  2108. logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2109. return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c) - logdenum
  2110. def _cdf(self, x, c, a, b):
  2111. num = (np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
  2112. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2113. return num / denum
  2114. def _logcdf(self, x, c, a, b):
  2115. lognum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
  2116. logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2117. return lognum - logdenum
  2118. def _sf(self, x, c, a, b):
  2119. num = (np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
  2120. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2121. return num / denum
  2122. def _logsf(self, x, c, a, b):
  2123. lognum = np.log(np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
  2124. logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2125. return lognum - logdenum
  2126. def _isf(self, q, c, a, b):
  2127. return pow(
  2128. -np.log((1 - q) * np.exp(-pow(b, c)) + q * np.exp(-pow(a, c))), 1/c
  2129. )
  2130. def _ppf(self, q, c, a, b):
  2131. return pow(
  2132. -np.log((1 - q) * np.exp(-pow(a, c)) + q * np.exp(-pow(b, c))), 1/c
  2133. )
  2134. def _munp(self, n, c, a, b):
  2135. gamma_fun = sc.gamma(n/c + 1.) * (
  2136. sc.gammainc(n/c + 1., pow(b, c)) - sc.gammainc(n/c + 1., pow(a, c))
  2137. )
  2138. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2139. return gamma_fun / denum
  2140. truncweibull_min = truncweibull_min_gen(name='truncweibull_min')
  2141. truncweibull_min._support = ('a', 'b')
  2142. class weibull_max_gen(rv_continuous):
  2143. r"""Weibull maximum continuous random variable.
  2144. The Weibull Maximum Extreme Value distribution, from extreme value theory
  2145. (Fisher-Gnedenko theorem), is the limiting distribution of rescaled
  2146. maximum of iid random variables. This is the distribution of -X
  2147. if X is from the `weibull_min` function.
  2148. %(before_notes)s
  2149. See Also
  2150. --------
  2151. weibull_min
  2152. Notes
  2153. -----
  2154. The probability density function for `weibull_max` is:
  2155. .. math::
  2156. f(x, c) = c (-x)^{c-1} \exp(-(-x)^c)
  2157. for :math:`x < 0`, :math:`c > 0`.
  2158. `weibull_max` takes ``c`` as a shape parameter for :math:`c`.
  2159. %(after_notes)s
  2160. References
  2161. ----------
  2162. https://en.wikipedia.org/wiki/Weibull_distribution
  2163. https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
  2164. %(example)s
  2165. """
  2166. def _shape_info(self):
  2167. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  2168. def _pdf(self, x, c):
  2169. # weibull_max.pdf(x, c) = c * (-x)**(c-1) * exp(-(-x)**c)
  2170. return c*pow(-x, c-1)*np.exp(-pow(-x, c))
  2171. def _logpdf(self, x, c):
  2172. return np.log(c) + sc.xlogy(c-1, -x) - pow(-x, c)
  2173. def _cdf(self, x, c):
  2174. return np.exp(-pow(-x, c))
  2175. def _logcdf(self, x, c):
  2176. return -pow(-x, c)
  2177. def _sf(self, x, c):
  2178. return -sc.expm1(-pow(-x, c))
  2179. def _ppf(self, q, c):
  2180. return -pow(-np.log(q), 1.0/c)
  2181. def _munp(self, n, c):
  2182. val = sc.gamma(1.0+n*1.0/c)
  2183. if int(n) % 2:
  2184. sgn = -1
  2185. else:
  2186. sgn = 1
  2187. return sgn * val
  2188. def _entropy(self, c):
  2189. return -_EULER / c - np.log(c) + _EULER + 1
  2190. weibull_max = weibull_max_gen(b=0.0, name='weibull_max')
  2191. class genlogistic_gen(rv_continuous):
  2192. r"""A generalized logistic continuous random variable.
  2193. %(before_notes)s
  2194. Notes
  2195. -----
  2196. The probability density function for `genlogistic` is:
  2197. .. math::
  2198. f(x, c) = c \frac{\exp(-x)}
  2199. {(1 + \exp(-x))^{c+1}}
  2200. for real :math:`x` and :math:`c > 0`. In literature, different
  2201. generalizations of the logistic distribution can be found. This is the type 1
  2202. generalized logistic distribution according to [1]_. It is also referred to
  2203. as the skew-logistic distribution [2]_.
  2204. `genlogistic` takes ``c`` as a shape parameter for :math:`c`.
  2205. %(after_notes)s
  2206. References
  2207. ----------
  2208. .. [1] Johnson et al. "Continuous Univariate Distributions", Volume 2,
  2209. Wiley. 1995.
  2210. .. [2] "Generalized Logistic Distribution", Wikipedia,
  2211. https://en.wikipedia.org/wiki/Generalized_logistic_distribution
  2212. %(example)s
  2213. """
  2214. def _shape_info(self):
  2215. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  2216. def _pdf(self, x, c):
  2217. # genlogistic.pdf(x, c) = c * exp(-x) / (1 + exp(-x))**(c+1)
  2218. return np.exp(self._logpdf(x, c))
  2219. def _logpdf(self, x, c):
  2220. # Two mathematically equivalent expressions for log(pdf(x, c)):
  2221. # log(pdf(x, c)) = log(c) - x - (c + 1)*log(1 + exp(-x))
  2222. # = log(c) + c*x - (c + 1)*log(1 + exp(x))
  2223. mult = -(c - 1) * (x < 0) - 1
  2224. absx = np.abs(x)
  2225. return np.log(c) + mult*absx - (c+1) * sc.log1p(np.exp(-absx))
  2226. def _cdf(self, x, c):
  2227. Cx = (1+np.exp(-x))**(-c)
  2228. return Cx
  2229. def _logcdf(self, x, c):
  2230. return -c * np.log1p(np.exp(-x))
  2231. def _ppf(self, q, c):
  2232. return -np.log(sc.powm1(q, -1.0/c))
  2233. def _sf(self, x, c):
  2234. return -sc.expm1(self._logcdf(x, c))
  2235. def _isf(self, q, c):
  2236. return self._ppf(1 - q, c)
  2237. def _stats(self, c):
  2238. mu = _EULER + sc.psi(c)
  2239. mu2 = np.pi*np.pi/6.0 + sc.zeta(2, c)
  2240. g1 = -2*sc.zeta(3, c) + 2*_ZETA3
  2241. g1 /= np.power(mu2, 1.5)
  2242. g2 = np.pi**4/15.0 + 6*sc.zeta(4, c)
  2243. g2 /= mu2**2.0
  2244. return mu, mu2, g1, g2
  2245. def _entropy(self, c):
  2246. return xpx.apply_where(
  2247. c < 8e6, c,
  2248. lambda c: -np.log(c) + sc.psi(c + 1) + _EULER + 1,
  2249. # asymptotic expansion: psi(c) ~ log(c) - 1 / (2 * c)
  2250. # a = -log(c) + psi(c + 1)
  2251. # = -log(c) + psi(c) + 1 / c
  2252. # ~ -log(c) + log(c) - 1 / (2 * c) + 1 / c
  2253. # = 1 / (2 * c)
  2254. lambda c: 1 / (2 * c) + _EULER + 1)
  2255. genlogistic = genlogistic_gen(name='genlogistic')
  2256. class genpareto_gen(rv_continuous):
  2257. r"""A generalized Pareto continuous random variable.
  2258. %(before_notes)s
  2259. Notes
  2260. -----
  2261. The probability density function for `genpareto` is:
  2262. .. math::
  2263. f(x, c) = (1 + c x)^{-1 - 1/c}
  2264. defined for :math:`x \ge 0` if :math:`c \ge 0`, and for
  2265. :math:`0 \le x \le -1/c` if :math:`c < 0`.
  2266. `genpareto` takes ``c`` as a shape parameter for :math:`c`.
  2267. For :math:`c=0`, `genpareto` reduces to the exponential
  2268. distribution, `expon`:
  2269. .. math::
  2270. f(x, 0) = \exp(-x)
  2271. For :math:`c=-1`, `genpareto` is uniform on ``[0, 1]``:
  2272. .. math::
  2273. f(x, -1) = 1
  2274. %(after_notes)s
  2275. %(example)s
  2276. """
  2277. def _argcheck(self, c):
  2278. return np.isfinite(c)
  2279. def _shape_info(self):
  2280. return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
  2281. def _get_support(self, c):
  2282. c = np.asarray(c)
  2283. a = np.broadcast_arrays(self.a, c)[0].copy()
  2284. b = xpx.apply_where(c < 0, c, lambda c: -1. / c,
  2285. fill_value=np.inf)
  2286. return a, b
  2287. def _pdf(self, x, c):
  2288. # genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
  2289. return np.exp(self._logpdf(x, c))
  2290. def _logpdf(self, x, c):
  2291. return xpx.apply_where((x == x) & (c != 0), (x, c),
  2292. lambda x, c: -sc.xlog1py(c + 1., c*x) / c,
  2293. fill_value=-x)
  2294. def _cdf(self, x, c):
  2295. return -sc.inv_boxcox1p(-x, -c)
  2296. def _sf(self, x, c):
  2297. return sc.inv_boxcox(-x, -c)
  2298. def _logsf(self, x, c):
  2299. return xpx.apply_where((x == x) & (c != 0), (x, c),
  2300. lambda x, c: -sc.log1p(c*x) / c,
  2301. fill_value=-x)
  2302. def _ppf(self, q, c):
  2303. return -sc.boxcox1p(-q, -c)
  2304. def _isf(self, q, c):
  2305. return -sc.boxcox(q, -c)
  2306. def _stats(self, c, moments='mv'):
  2307. m, v, s, k = None, None, None, None
  2308. if 'm' in moments:
  2309. m = xpx.apply_where(c < 1, c,
  2310. lambda xi: 1 / (1 - xi),
  2311. fill_value=np.inf)
  2312. if 'v' in moments:
  2313. v = xpx.apply_where(c < 1/2, c,
  2314. lambda xi: 1 / (1 - xi)**2 / (1 - 2 * xi),
  2315. fill_value=np.nan)
  2316. if 's' in moments:
  2317. s = xpx.apply_where(
  2318. c < 1/3, c,
  2319. lambda xi: 2 * (1 + xi) * np.sqrt(1 - 2*xi) / (1 - 3*xi),
  2320. fill_value=np.nan)
  2321. if 'k' in moments:
  2322. k = xpx.apply_where(
  2323. c < 1/4, c,
  2324. lambda xi: 3 * (1 - 2*xi) * (2*xi**2 + xi + 3)
  2325. / (1 - 3*xi) / (1 - 4*xi) - 3,
  2326. fill_value=np.nan)
  2327. return m, v, s, k
  2328. def _munp(self, n, c):
  2329. def __munp(c):
  2330. val = 0.0
  2331. k = np.arange(0, n + 1)
  2332. for ki, cnk in zip(k, sc.comb(n, k)):
  2333. val = val + cnk * (-1) ** ki / (1.0 - c * ki)
  2334. return np.where(c * n < 1, val * (-1.0 / c) ** n, np.inf)
  2335. return xpx.apply_where(c != 0, c, __munp, fill_value=sc.gamma(n + 1))
  2336. def _entropy(self, c):
  2337. return 1. + c
  2338. genpareto = genpareto_gen(a=0.0, name='genpareto')
  2339. class genexpon_gen(rv_continuous):
  2340. r"""A generalized exponential continuous random variable.
  2341. %(before_notes)s
  2342. Notes
  2343. -----
  2344. The probability density function for `genexpon` is:
  2345. .. math::
  2346. f(x, a, b, c) = (a + b (1 - \exp(-c x)))
  2347. \exp(-a x - b x + \frac{b}{c} (1-\exp(-c x)))
  2348. for :math:`x \ge 0`, :math:`a, b, c > 0`.
  2349. `genexpon` takes :math:`a`, :math:`b` and :math:`c` as shape parameters.
  2350. %(after_notes)s
  2351. References
  2352. ----------
  2353. H.K. Ryu, "An Extension of Marshall and Olkin's Bivariate Exponential
  2354. Distribution", Journal of the American Statistical Association, 1993.
  2355. N. Balakrishnan, Asit P. Basu (editors), *The Exponential Distribution:
  2356. Theory, Methods and Applications*, Gordon and Breach, 1995.
  2357. ISBN 10: 2884491929
  2358. %(example)s
  2359. """
  2360. def _shape_info(self):
  2361. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  2362. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  2363. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  2364. return [ia, ib, ic]
  2365. def _pdf(self, x, a, b, c):
  2366. # genexpon.pdf(x, a, b, c) = (a + b * (1 - exp(-c*x))) * \
  2367. # exp(-a*x - b*x + b/c * (1-exp(-c*x)))
  2368. return (a + b*(-sc.expm1(-c*x)))*np.exp((-a-b)*x +
  2369. b*(-sc.expm1(-c*x))/c)
  2370. def _logpdf(self, x, a, b, c):
  2371. return np.log(a+b*(-sc.expm1(-c*x))) + (-a-b)*x+b*(-sc.expm1(-c*x))/c
  2372. def _cdf(self, x, a, b, c):
  2373. return -sc.expm1((-a-b)*x + b*(-sc.expm1(-c*x))/c)
  2374. def _ppf(self, p, a, b, c):
  2375. s = a + b
  2376. t = (b - c*np.log1p(-p))/s
  2377. return (t + sc.lambertw(-b/s * np.exp(-t)).real)/c
  2378. def _sf(self, x, a, b, c):
  2379. return np.exp((-a-b)*x + b*(-sc.expm1(-c*x))/c)
  2380. def _isf(self, p, a, b, c):
  2381. s = a + b
  2382. t = (b - c*np.log(p))/s
  2383. return (t + sc.lambertw(-b/s * np.exp(-t)).real)/c
  2384. genexpon = genexpon_gen(a=0.0, name='genexpon')
  2385. class genextreme_gen(rv_continuous):
  2386. r"""A generalized extreme value continuous random variable.
  2387. %(before_notes)s
  2388. See Also
  2389. --------
  2390. gumbel_r
  2391. Notes
  2392. -----
  2393. For :math:`c=0`, `genextreme` is equal to `gumbel_r` with
  2394. probability density function
  2395. .. math::
  2396. f(x) = \exp(-\exp(-x)) \exp(-x),
  2397. where :math:`-\infty < x < \infty`.
  2398. For :math:`c \ne 0`, the probability density function for `genextreme` is:
  2399. .. math::
  2400. f(x, c) = \exp(-(1-c x)^{1/c}) (1-c x)^{1/c-1},
  2401. where :math:`-\infty < x \le 1/c` if :math:`c > 0` and
  2402. :math:`1/c \le x < \infty` if :math:`c < 0`.
  2403. Note that several sources and software packages use the opposite
  2404. convention for the sign of the shape parameter :math:`c`.
  2405. `genextreme` takes ``c`` as a shape parameter for :math:`c`.
  2406. %(after_notes)s
  2407. %(example)s
  2408. """
  2409. def _argcheck(self, c):
  2410. return np.isfinite(c)
  2411. def _shape_info(self):
  2412. return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
  2413. def _get_support(self, c):
  2414. _b = np.where(c > 0, 1.0 / np.maximum(c, _XMIN), np.inf)
  2415. _a = np.where(c < 0, 1.0 / np.minimum(c, -_XMIN), -np.inf)
  2416. return _a, _b
  2417. def _loglogcdf(self, x, c):
  2418. # Returns log(-log(cdf(x, c)))
  2419. return xpx.apply_where(
  2420. (x == x) & (c != 0), (x, c),
  2421. lambda x, c: sc.log1p(-c*x)/c,
  2422. fill_value=-x)
  2423. def _pdf(self, x, c):
  2424. # genextreme.pdf(x, c) =
  2425. # exp(-exp(-x))*exp(-x), for c==0
  2426. # exp(-(1-c*x)**(1/c))*(1-c*x)**(1/c-1), for x \le 1/c, c > 0
  2427. return np.exp(self._logpdf(x, c))
  2428. def _logpdf(self, x, c):
  2429. # Suppress warnings 0 * inf
  2430. cx = xpx.apply_where((x == x) & (c != 0), (c, x),
  2431. operator.mul, fill_value=0.0)
  2432. logex2 = sc.log1p(-cx)
  2433. logpex2 = self._loglogcdf(x, c)
  2434. pex2 = np.exp(logpex2)
  2435. # Handle special cases
  2436. np.putmask(logpex2, (c == 0) & (x == -np.inf), 0.0)
  2437. logpdf = xpx.apply_where(
  2438. ~((cx == 1) | (cx == -np.inf)),
  2439. (pex2, logpex2, logex2),
  2440. lambda pex2, lpex2, lex2: -pex2 + lpex2 - lex2,
  2441. fill_value=-np.inf)
  2442. np.putmask(logpdf, (c == 1) & (x == 1), 0.0)
  2443. return logpdf
  2444. def _logcdf(self, x, c):
  2445. return -np.exp(self._loglogcdf(x, c))
  2446. def _cdf(self, x, c):
  2447. return np.exp(self._logcdf(x, c))
  2448. def _sf(self, x, c):
  2449. return -sc.expm1(self._logcdf(x, c))
  2450. def _ppf(self, q, c):
  2451. x = -np.log(-np.log(q))
  2452. return xpx.apply_where(
  2453. (x == x) & (c != 0), (x, c),
  2454. lambda x, c: -sc.expm1(-c * x) / c,
  2455. fill_value=x)
  2456. def _isf(self, q, c):
  2457. x = -np.log(-sc.log1p(-q))
  2458. return xpx.apply_where(
  2459. (x == x) & (c != 0), (x, c),
  2460. lambda x, c: -sc.expm1(-c * x) / c,
  2461. fill_value=x)
  2462. def _stats(self, c):
  2463. def g(n):
  2464. return sc.gamma(n * c + 1)
  2465. g1 = g(1)
  2466. g2 = g(2)
  2467. g3 = g(3)
  2468. g4 = g(4)
  2469. g2mg12 = np.where(abs(c) < 1e-7, (c*np.pi)**2.0/6.0, g2-g1**2.0)
  2470. def gam2k_f(c):
  2471. return sc.expm1(sc.gammaln(2.0*c+1.0)-2*sc.gammaln(c + 1.0))/c**2.0
  2472. gam2k = xpx.apply_where(abs(c) >= 1e-7, c, gam2k_f, fill_value=np.pi**2.0/6.0)
  2473. eps = 1e-14
  2474. def gamk_f(c):
  2475. return sc.expm1(sc.gammaln(c + 1))/c
  2476. gamk = xpx.apply_where(abs(c) >= eps, c, gamk_f, fill_value=-_EULER)
  2477. # mean
  2478. m = np.where(c < -1.0, np.nan, -gamk)
  2479. # variance
  2480. v = np.where(c < -0.5, np.nan, g1**2.0*gam2k)
  2481. # skewness
  2482. def sk1_eval(c, *args):
  2483. def sk1_eval_f(c, g1, g2, g3, g2mg12):
  2484. return np.sign(c)*(-g3 + (g2 + 2*g2mg12)*g1)/g2mg12**1.5
  2485. return xpx.apply_where(c >= -1./3, (c, *args),
  2486. sk1_eval_f, fill_value=np.nan)
  2487. sk_fill = 12*np.sqrt(6)*_ZETA3/np.pi**3
  2488. args = (g1, g2, g3, g2mg12)
  2489. sk = xpx.apply_where(abs(c) > eps**0.29, (c, *args),
  2490. sk1_eval, fill_value=sk_fill)
  2491. # kurtosis
  2492. def ku1_eval(c, *args):
  2493. def ku1_eval_f(g1, g2, g3, g4, g2mg12):
  2494. return (g4 + (-4*g3 + 3*(g2 + g2mg12)*g1)*g1)/g2mg12**2 - 3
  2495. return xpx.apply_where(c >= -1./4, args, ku1_eval_f, fill_value=np.nan)
  2496. args = (g1, g2, g3, g4, g2mg12)
  2497. ku = xpx.apply_where(abs(c) > eps**0.23, (c, *args),
  2498. ku1_eval, fill_value=12.0/5.0)
  2499. return m, v, sk, ku
  2500. def _fitstart(self, data):
  2501. if isinstance(data, CensoredData):
  2502. data = data._uncensor()
  2503. # This is better than the default shape of (1,).
  2504. g = _skew(data)
  2505. if g < 0:
  2506. a = 0.5
  2507. else:
  2508. a = -0.5
  2509. return super()._fitstart(data, args=(a,))
  2510. def _munp(self, n, c):
  2511. k = np.arange(0, n+1)
  2512. vals = 1.0/c**n * np.sum(
  2513. sc.comb(n, k) * (-1)**k * sc.gamma(c*k + 1),
  2514. axis=0)
  2515. return np.where(c*n > -1, vals, np.inf)
  2516. def _entropy(self, c):
  2517. return _EULER*(1 - c) + 1
  2518. genextreme = genextreme_gen(name='genextreme')
  2519. def _digammainv(y):
  2520. """Inverse of the digamma function (real positive arguments only).
  2521. This function is used in the `fit` method of `gamma_gen`.
  2522. The function uses either optimize.fsolve or optimize.newton
  2523. to solve `sc.digamma(x) - y = 0`. There is probably room for
  2524. improvement, but currently it works over a wide range of y:
  2525. >>> import numpy as np
  2526. >>> rng = np.random.default_rng()
  2527. >>> y = 64*rng.standard_normal(1000000)
  2528. >>> y.min(), y.max()
  2529. (-311.43592651416662, 351.77388222276869)
  2530. >>> x = [_digammainv(t) for t in y]
  2531. >>> np.abs(sc.digamma(x) - y).max()
  2532. 1.1368683772161603e-13
  2533. """
  2534. _em = 0.5772156649015328606065120
  2535. def func(x):
  2536. return sc.digamma(x) - y
  2537. if y > -0.125:
  2538. x0 = np.exp(y) + 0.5
  2539. if y < 10:
  2540. # Some experimentation shows that newton reliably converges
  2541. # must faster than fsolve in this y range. For larger y,
  2542. # newton sometimes fails to converge.
  2543. value = optimize.newton(func, x0, tol=1e-10)
  2544. return value
  2545. elif y > -3:
  2546. x0 = np.exp(y/2.332) + 0.08661
  2547. else:
  2548. x0 = 1.0 / (-y - _em)
  2549. value, info, ier, mesg = optimize.fsolve(func, x0, xtol=1e-11,
  2550. full_output=True)
  2551. if ier != 1:
  2552. raise RuntimeError(f"_digammainv: fsolve failed, y = {y!r}")
  2553. return value[0]
  2554. ## Gamma (Use MATLAB and MATHEMATICA (b=theta=scale, a=alpha=shape) definition)
  2555. ## gamma(a, loc, scale) with a an integer is the Erlang distribution
  2556. ## gamma(1, loc, scale) is the Exponential distribution
  2557. ## gamma(df/2, 0, 2) is the chi2 distribution with df degrees of freedom.
  2558. class gamma_gen(rv_continuous):
  2559. r"""A gamma continuous random variable.
  2560. %(before_notes)s
  2561. See Also
  2562. --------
  2563. erlang, expon
  2564. Notes
  2565. -----
  2566. The probability density function for `gamma` is:
  2567. .. math::
  2568. f(x, a) = \frac{x^{a-1} e^{-x}}{\Gamma(a)}
  2569. for :math:`x \ge 0`, :math:`a > 0`. Here :math:`\Gamma(a)` refers to the
  2570. gamma function.
  2571. `gamma` takes ``a`` as a shape parameter for :math:`a`.
  2572. When :math:`a` is an integer, `gamma` reduces to the Erlang
  2573. distribution, and when :math:`a=1` to the exponential distribution.
  2574. Gamma distributions are sometimes parameterized with two variables,
  2575. with a probability density function of:
  2576. .. math::
  2577. f(x, \alpha, \beta) =
  2578. \frac{\beta^\alpha x^{\alpha - 1} e^{-\beta x }}{\Gamma(\alpha)}
  2579. Note that this parameterization is equivalent to the above, with
  2580. ``scale = 1 / beta``.
  2581. %(after_notes)s
  2582. %(example)s
  2583. """
  2584. def _shape_info(self):
  2585. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  2586. def _rvs(self, a, size=None, random_state=None):
  2587. return random_state.standard_gamma(a, size)
  2588. def _pdf(self, x, a):
  2589. # gamma.pdf(x, a) = x**(a-1) * exp(-x) / gamma(a)
  2590. return np.exp(self._logpdf(x, a))
  2591. def _logpdf(self, x, a):
  2592. return sc.xlogy(a-1.0, x) - x - sc.gammaln(a)
  2593. def _cdf(self, x, a):
  2594. return sc.gammainc(a, x)
  2595. def _sf(self, x, a):
  2596. return sc.gammaincc(a, x)
  2597. def _ppf(self, q, a):
  2598. return sc.gammaincinv(a, q)
  2599. def _isf(self, q, a):
  2600. return sc.gammainccinv(a, q)
  2601. def _stats(self, a):
  2602. return a, a, 2.0/np.sqrt(a), 6.0/a
  2603. def _munp(self, n, a):
  2604. return sc.poch(a, n)
  2605. def _entropy(self, a):
  2606. def regular_formula(a):
  2607. return sc.psi(a) * (1-a) + a + sc.gammaln(a)
  2608. def asymptotic_formula(a):
  2609. # plug in above formula the expansions:
  2610. # psi(a) ~ ln(a) - 1/2a - 1/12a^2 + 1/120a^4
  2611. # gammaln(a) ~ a * ln(a) - a - 1/2 * ln(a) + 1/2 ln(2 * pi) +
  2612. # 1/12a - 1/360a^3
  2613. return (0.5 * (1. + np.log(2*np.pi) + np.log(a)) - 1/(3 * a)
  2614. - (a**-2.)/12 - (a**-3.)/90 + (a**-4.)/120)
  2615. return xpx.apply_where(a < 250, a, regular_formula, asymptotic_formula)
  2616. def _fitstart(self, data):
  2617. # The skewness of the gamma distribution is `2 / np.sqrt(a)`.
  2618. # We invert that to estimate the shape `a` using the skewness
  2619. # of the data. The formula is regularized with 1e-8 in the
  2620. # denominator to allow for degenerate data where the skewness
  2621. # is close to 0.
  2622. if isinstance(data, CensoredData):
  2623. data = data._uncensor()
  2624. sk = _skew(data)
  2625. a = 4 / (1e-8 + sk**2)
  2626. return super()._fitstart(data, args=(a,))
  2627. @extend_notes_in_docstring(rv_continuous, notes="""\
  2628. When the location is fixed by using the argument `floc`
  2629. and `method='MLE'`, this
  2630. function uses explicit formulas or solves a simpler numerical
  2631. problem than the full ML optimization problem. So in that case,
  2632. the `optimizer`, `loc` and `scale` arguments are ignored.
  2633. \n\n""")
  2634. def fit(self, data, *args, **kwds):
  2635. floc = kwds.get('floc', None)
  2636. method = kwds.get('method', 'mle')
  2637. if (isinstance(data, CensoredData) or
  2638. floc is None and method.lower() != 'mm'):
  2639. # loc is not fixed or we're not doing standard MLE.
  2640. # Use the default fit method.
  2641. return super().fit(data, *args, **kwds)
  2642. # We already have this value, so just pop it from kwds.
  2643. kwds.pop('floc', None)
  2644. f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
  2645. fscale = kwds.pop('fscale', None)
  2646. _remove_optimizer_parameters(kwds)
  2647. if f0 is not None and floc is not None and fscale is not None:
  2648. # This check is for consistency with `rv_continuous.fit`.
  2649. # Without this check, this function would just return the
  2650. # parameters that were given.
  2651. raise ValueError("All parameters fixed. There is nothing to "
  2652. "optimize.")
  2653. # Fixed location is handled by shifting the data.
  2654. data = np.asarray(data)
  2655. if not np.isfinite(data).all():
  2656. raise ValueError("The data contains non-finite values.")
  2657. # Use explicit formulas for mm (gh-19884)
  2658. if method.lower() == 'mm':
  2659. m1 = np.mean(data)
  2660. m2 = np.var(data)
  2661. m3 = np.mean((data - m1) ** 3)
  2662. a, loc, scale = f0, floc, fscale
  2663. # Three unknowns
  2664. if a is None and loc is None and scale is None:
  2665. scale = m3 / (2 * m2)
  2666. # Two unknowns
  2667. if loc is None and scale is None:
  2668. scale = np.sqrt(m2 / a)
  2669. if a is None and scale is None:
  2670. scale = m2 / (m1 - loc)
  2671. if a is None and loc is None:
  2672. a = m2 / (scale ** 2)
  2673. # One unknown
  2674. if a is None:
  2675. a = (m1 - loc) / scale
  2676. if loc is None:
  2677. loc = m1 - a * scale
  2678. if scale is None:
  2679. scale = (m1 - loc) / a
  2680. return a, loc, scale
  2681. # Special case: loc is fixed.
  2682. # NB: data == loc is ok if a >= 1; the below check is more strict.
  2683. if np.any(data <= floc):
  2684. raise FitDataError("gamma", lower=floc, upper=np.inf)
  2685. if floc != 0:
  2686. # Don't do the subtraction in-place, because `data` might be a
  2687. # view of the input array.
  2688. data = data - floc
  2689. xbar = data.mean()
  2690. # Three cases to handle:
  2691. # * shape and scale both free
  2692. # * shape fixed, scale free
  2693. # * shape free, scale fixed
  2694. if fscale is None:
  2695. # scale is free
  2696. if f0 is not None:
  2697. # shape is fixed
  2698. a = f0
  2699. else:
  2700. # shape and scale are both free.
  2701. # The MLE for the shape parameter `a` is the solution to:
  2702. # np.log(a) - sc.digamma(a) - np.log(xbar) +
  2703. # np.log(data).mean() = 0
  2704. s = np.log(xbar) - np.log(data).mean()
  2705. aest = (3-s + np.sqrt((s-3)**2 + 24*s)) / (12*s)
  2706. xa = aest*(1-0.4)
  2707. xb = aest*(1+0.4)
  2708. a = optimize.brentq(lambda a: np.log(a) - sc.digamma(a) - s,
  2709. xa, xb, disp=0)
  2710. # The MLE for the scale parameter is just the data mean
  2711. # divided by the shape parameter.
  2712. scale = xbar / a
  2713. else:
  2714. # scale is fixed, shape is free
  2715. # The MLE for the shape parameter `a` is the solution to:
  2716. # sc.digamma(a) - np.log(data).mean() + np.log(fscale) = 0
  2717. c = np.log(data).mean() - np.log(fscale)
  2718. a = _digammainv(c)
  2719. scale = fscale
  2720. return a, floc, scale
  2721. gamma = gamma_gen(a=0.0, name='gamma')
  2722. class erlang_gen(gamma_gen):
  2723. """An Erlang continuous random variable.
  2724. %(before_notes)s
  2725. See Also
  2726. --------
  2727. gamma
  2728. Notes
  2729. -----
  2730. The Erlang distribution is a special case of the Gamma distribution, with
  2731. the shape parameter `a` an integer. Note that this restriction is not
  2732. enforced by `erlang`. It will, however, generate a warning the first time
  2733. a non-integer value is used for the shape parameter.
  2734. Refer to `gamma` for examples.
  2735. """
  2736. def _argcheck(self, a):
  2737. allint = np.all(np.floor(a) == a)
  2738. if not allint:
  2739. # An Erlang distribution shouldn't really have a non-integer
  2740. # shape parameter, so warn the user.
  2741. message = ('The shape parameter of the erlang distribution '
  2742. f'has been given a non-integer value {a!r}.')
  2743. warnings.warn(message, RuntimeWarning, stacklevel=3)
  2744. return a > 0
  2745. def _shape_info(self):
  2746. return [_ShapeInfo("a", True, (1, np.inf), (True, False))]
  2747. def _fitstart(self, data):
  2748. # Override gamma_gen_fitstart so that an integer initial value is
  2749. # used. (Also regularize the division, to avoid issues when
  2750. # _skew(data) is 0 or close to 0.)
  2751. if isinstance(data, CensoredData):
  2752. data = data._uncensor()
  2753. a = int(4.0 / (1e-8 + _skew(data)**2))
  2754. return super(gamma_gen, self)._fitstart(data, args=(a,))
  2755. # Trivial override of the fit method, so we can monkey-patch its
  2756. # docstring.
  2757. @extend_notes_in_docstring(rv_continuous, notes="""\
  2758. The Erlang distribution is generally defined to have integer values
  2759. for the shape parameter. This is not enforced by the `erlang` class.
  2760. When fitting the distribution, it will generally return a non-integer
  2761. value for the shape parameter. By using the keyword argument
  2762. `f0=<integer>`, the fit method can be constrained to fit the data to
  2763. a specific integer shape parameter.""")
  2764. def fit(self, data, *args, **kwds):
  2765. return super().fit(data, *args, **kwds)
  2766. erlang = erlang_gen(a=0.0, name='erlang')
  2767. class gengamma_gen(rv_continuous):
  2768. r"""A generalized gamma continuous random variable.
  2769. %(before_notes)s
  2770. See Also
  2771. --------
  2772. gamma, invgamma, weibull_min
  2773. Notes
  2774. -----
  2775. The probability density function for `gengamma` is ([1]_):
  2776. .. math::
  2777. f(x, a, c) = \frac{|c| x^{c a-1} \exp(-x^c)}{\Gamma(a)}
  2778. for :math:`x \ge 0`, :math:`a > 0`, and :math:`c \ne 0`.
  2779. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  2780. `gengamma` takes :math:`a` and :math:`c` as shape parameters.
  2781. %(after_notes)s
  2782. References
  2783. ----------
  2784. .. [1] E.W. Stacy, "A Generalization of the Gamma Distribution",
  2785. Annals of Mathematical Statistics, Vol 33(3), pp. 1187--1192.
  2786. %(example)s
  2787. """
  2788. def _argcheck(self, a, c):
  2789. return (a > 0) & (c != 0)
  2790. def _shape_info(self):
  2791. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  2792. ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
  2793. return [ia, ic]
  2794. def _pdf(self, x, a, c):
  2795. return np.exp(self._logpdf(x, a, c))
  2796. def _logpdf(self, x, a, c):
  2797. return xpx.apply_where(
  2798. (x != 0) | (c > 0), (x, c, a),
  2799. lambda x, c, a: (np.log(abs(c)) + sc.xlogy(c*a - 1, x)
  2800. - x**c - sc.gammaln(a)),
  2801. fill_value=-np.inf)
  2802. def _cdf(self, x, a, c):
  2803. xc = x**c
  2804. val1 = sc.gammainc(a, xc)
  2805. val2 = sc.gammaincc(a, xc)
  2806. return np.where(c > 0, val1, val2)
  2807. def _rvs(self, a, c, size=None, random_state=None):
  2808. r = random_state.standard_gamma(a, size=size)
  2809. return r**(1./c)
  2810. def _sf(self, x, a, c):
  2811. xc = x**c
  2812. val1 = sc.gammainc(a, xc)
  2813. val2 = sc.gammaincc(a, xc)
  2814. return np.where(c > 0, val2, val1)
  2815. def _ppf(self, q, a, c):
  2816. val1 = sc.gammaincinv(a, q)
  2817. val2 = sc.gammainccinv(a, q)
  2818. return np.where(c > 0, val1, val2)**(1.0/c)
  2819. def _isf(self, q, a, c):
  2820. val1 = sc.gammaincinv(a, q)
  2821. val2 = sc.gammainccinv(a, q)
  2822. return np.where(c > 0, val2, val1)**(1.0/c)
  2823. def _munp(self, n, a, c):
  2824. # Pochhammer symbol: sc.pocha,n) = gamma(a+n)/gamma(a)
  2825. return sc.poch(a, n*1.0/c)
  2826. def _entropy(self, a, c):
  2827. def regular(a, c):
  2828. val = sc.psi(a)
  2829. A = a * (1 - val) + val / c
  2830. B = sc.gammaln(a) - np.log(abs(c))
  2831. h = A + B
  2832. return h
  2833. def asymptotic(a, c):
  2834. # using asymptotic expansions for gammaln and psi (see gh-18093)
  2835. return (norm._entropy() - np.log(a)/2
  2836. - np.log(np.abs(c)) + (a**-1.)/6 - (a**-3.)/90
  2837. + (np.log(a) - (a**-1.)/2 - (a**-2.)/12 + (a**-4.)/120)/c)
  2838. return xpx.apply_where(a >= 200, (a, c), asymptotic, regular)
  2839. gengamma = gengamma_gen(a=0.0, name='gengamma')
  2840. class genhalflogistic_gen(rv_continuous):
  2841. r"""A generalized half-logistic continuous random variable.
  2842. %(before_notes)s
  2843. Notes
  2844. -----
  2845. The probability density function for `genhalflogistic` is:
  2846. .. math::
  2847. f(x, c) = \frac{2 (1 - c x)^{1/(c-1)}}{[1 + (1 - c x)^{1/c}]^2}
  2848. for :math:`0 \le x \le 1/c`, and :math:`c > 0`.
  2849. `genhalflogistic` takes ``c`` as a shape parameter for :math:`c`.
  2850. %(after_notes)s
  2851. %(example)s
  2852. """
  2853. def _shape_info(self):
  2854. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  2855. def _get_support(self, c):
  2856. return self.a, 1.0/c
  2857. def _pdf(self, x, c):
  2858. # genhalflogistic.pdf(x, c) =
  2859. # 2 * (1-c*x)**(1/c-1) / (1+(1-c*x)**(1/c))**2
  2860. limit = 1.0/c
  2861. tmp = np.asarray(1-c*x)
  2862. tmp0 = tmp**(limit-1)
  2863. tmp2 = tmp0*tmp
  2864. return 2*tmp0 / (1+tmp2)**2
  2865. def _cdf(self, x, c):
  2866. limit = 1.0/c
  2867. tmp = np.asarray(1-c*x)
  2868. tmp2 = tmp**(limit)
  2869. return (1.0-tmp2) / (1+tmp2)
  2870. def _ppf(self, q, c):
  2871. return 1.0/c*(1-((1.0-q)/(1.0+q))**c)
  2872. def _entropy(self, c):
  2873. return 2 - (2*c+1)*np.log(2)
  2874. genhalflogistic = genhalflogistic_gen(a=0.0, name='genhalflogistic')
  2875. class genhyperbolic_gen(rv_continuous):
  2876. r"""A generalized hyperbolic continuous random variable.
  2877. %(before_notes)s
  2878. See Also
  2879. --------
  2880. t, norminvgauss, geninvgauss, laplace, cauchy
  2881. Notes
  2882. -----
  2883. The probability density function for `genhyperbolic` is:
  2884. .. math::
  2885. f(x, p, a, b) =
  2886. \frac{(a^2 - b^2)^{p/2}}
  2887. {\sqrt{2\pi}a^{p-1/2}
  2888. K_p\Big(\sqrt{a^2 - b^2}\Big)}
  2889. e^{bx} \times \frac{K_{p - 1/2}
  2890. (a \sqrt{1 + x^2})}
  2891. {(\sqrt{1 + x^2})^{1/2 - p}}
  2892. for :math:`x, p \in ( - \infty; \infty)`,
  2893. :math:`|b| < a` if :math:`p \ge 0`,
  2894. :math:`|b| \le a` if :math:`p < 0`.
  2895. :math:`K_{p}(.)` denotes the modified Bessel function of the second
  2896. kind and order :math:`p` (`scipy.special.kv`)
  2897. `genhyperbolic` takes ``p`` as a tail parameter,
  2898. ``a`` as a shape parameter,
  2899. ``b`` as a skewness parameter.
  2900. %(after_notes)s
  2901. The original parameterization of the Generalized Hyperbolic Distribution
  2902. is found in [1]_ as follows
  2903. .. math::
  2904. f(x, \lambda, \alpha, \beta, \delta, \mu) =
  2905. \frac{(\gamma/\delta)^\lambda}{\sqrt{2\pi}K_\lambda(\delta \gamma)}
  2906. e^{\beta (x - \mu)} \times \frac{K_{\lambda - 1/2}
  2907. (\alpha \sqrt{\delta^2 + (x - \mu)^2})}
  2908. {(\sqrt{\delta^2 + (x - \mu)^2} / \alpha)^{1/2 - \lambda}}
  2909. for :math:`x \in ( - \infty; \infty)`,
  2910. :math:`\gamma := \sqrt{\alpha^2 - \beta^2}`,
  2911. :math:`\lambda, \mu \in ( - \infty; \infty)`,
  2912. :math:`\delta \ge 0, |\beta| < \alpha` if :math:`\lambda \ge 0`,
  2913. :math:`\delta > 0, |\beta| \le \alpha` if :math:`\lambda < 0`.
  2914. The location-scale-based parameterization implemented in
  2915. SciPy is based on [2]_, where :math:`a = \alpha\delta`,
  2916. :math:`b = \beta\delta`, :math:`p = \lambda`,
  2917. :math:`scale=\delta` and :math:`loc=\mu`
  2918. Moments are implemented based on [3]_ and [4]_.
  2919. For the distributions that are a special case such as Student's t,
  2920. it is not recommended to rely on the implementation of genhyperbolic.
  2921. To avoid potential numerical problems and for performance reasons,
  2922. the methods of the specific distributions should be used.
  2923. References
  2924. ----------
  2925. .. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions
  2926. on Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
  2927. pp. 151-157, 1978. https://www.jstor.org/stable/4615705
  2928. .. [2] Eberlein E., Prause K. (2002) The Generalized Hyperbolic Model:
  2929. Financial Derivatives and Risk Measures. In: Geman H., Madan D.,
  2930. Pliska S.R., Vorst T. (eds) Mathematical Finance - Bachelier
  2931. Congress 2000. Springer Finance. Springer, Berlin, Heidelberg.
  2932. :doi:`10.1007/978-3-662-12429-1_12`
  2933. .. [3] Scott, David J, Würtz, Diethelm, Dong, Christine and Tran,
  2934. Thanh Tam, (2009), Moments of the generalized hyperbolic
  2935. distribution, MPRA Paper, University Library of Munich, Germany,
  2936. https://EconPapers.repec.org/RePEc:pra:mprapa:19081.
  2937. .. [4] E. Eberlein and E. A. von Hammerstein. Generalized hyperbolic
  2938. and inverse Gaussian distributions: Limiting cases and approximation
  2939. of processes. FDM Preprint 80, April 2003. University of Freiburg.
  2940. https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
  2941. %(example)s
  2942. """
  2943. def _argcheck(self, p, a, b):
  2944. return (np.logical_and(np.abs(b) < a, p >= 0)
  2945. | np.logical_and(np.abs(b) <= a, p < 0))
  2946. def _shape_info(self):
  2947. ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
  2948. ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
  2949. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
  2950. return [ip, ia, ib]
  2951. def _fitstart(self, data):
  2952. # Arbitrary, but the default p = a = b = 1 is not valid; the
  2953. # distribution requires |b| < a if p >= 0.
  2954. return super()._fitstart(data, args=(1, 1, 0.5))
  2955. def _logpdf(self, x, p, a, b):
  2956. # kve instead of kv works better for large values of p
  2957. # and smaller values of sqrt(a^2 - b^2)
  2958. @np.vectorize
  2959. def _logpdf_single(x, p, a, b):
  2960. return _stats.genhyperbolic_logpdf(x, p, a, b)
  2961. return _logpdf_single(x, p, a, b)
  2962. def _pdf(self, x, p, a, b):
  2963. # kve instead of kv works better for large values of p
  2964. # and smaller values of sqrt(a^2 - b^2)
  2965. @np.vectorize
  2966. def _pdf_single(x, p, a, b):
  2967. return _stats.genhyperbolic_pdf(x, p, a, b)
  2968. return _pdf_single(x, p, a, b)
  2969. # np.vectorize isn't currently designed to be used as a decorator,
  2970. # so use a lambda instead. This allows us to decorate the function
  2971. # with `np.vectorize` and still provide the `otypes` parameter.
  2972. @lambda func: np.vectorize(func, otypes=[np.float64])
  2973. @staticmethod
  2974. def _integrate_pdf(x0, x1, p, a, b):
  2975. """
  2976. Integrate the pdf of the genhyberbolic distribution from x0 to x1.
  2977. This is a private function used by _cdf() and _sf() only; either x0
  2978. will be -inf or x1 will be inf.
  2979. """
  2980. user_data = np.array([p, a, b], float).ctypes.data_as(ctypes.c_void_p)
  2981. llc = LowLevelCallable.from_cython(_stats, '_genhyperbolic_pdf',
  2982. user_data)
  2983. d = np.sqrt((a + b)*(a - b))
  2984. mean = b/d * sc.kv(p + 1, d) / sc.kv(p, d)
  2985. epsrel = 1e-10
  2986. epsabs = 0
  2987. if x0 < mean < x1:
  2988. # If the interval includes the mean, integrate over the two
  2989. # intervals [x0, mean] and [mean, x1] and add. If we try to do
  2990. # the integral in one call of quad and the non-infinite endpoint
  2991. # is far in the tail, quad might return an incorrect result
  2992. # because it does not "see" the peak of the PDF.
  2993. intgrl = (integrate.quad(llc, x0, mean,
  2994. epsrel=epsrel, epsabs=epsabs)[0]
  2995. + integrate.quad(llc, mean, x1,
  2996. epsrel=epsrel, epsabs=epsabs)[0])
  2997. else:
  2998. intgrl = integrate.quad(llc, x0, x1,
  2999. epsrel=epsrel, epsabs=epsabs)[0]
  3000. if np.isnan(intgrl):
  3001. msg = ("Infinite values encountered in scipy.special.kve. "
  3002. "Values replaced by NaN to avoid incorrect results.")
  3003. warnings.warn(msg, RuntimeWarning, stacklevel=3)
  3004. return max(0.0, min(1.0, intgrl))
  3005. def _cdf(self, x, p, a, b):
  3006. return self._integrate_pdf(-np.inf, x, p, a, b)
  3007. def _sf(self, x, p, a, b):
  3008. return self._integrate_pdf(x, np.inf, p, a, b)
  3009. def _rvs(self, p, a, b, size=None, random_state=None):
  3010. # note: X = b * V + sqrt(V) * X has a
  3011. # generalized hyperbolic distribution
  3012. # if X is standard normal and V is
  3013. # geninvgauss(p = p, b = t2, loc = loc, scale = t3)
  3014. t1 = np.float_power(a, 2) - np.float_power(b, 2)
  3015. # b in the GIG
  3016. t2 = np.float_power(t1, 0.5)
  3017. # scale in the GIG
  3018. t3 = np.float_power(t1, - 0.5)
  3019. gig = geninvgauss.rvs(
  3020. p=p,
  3021. b=t2,
  3022. scale=t3,
  3023. size=size,
  3024. random_state=random_state
  3025. )
  3026. normst = norm.rvs(size=size, random_state=random_state)
  3027. return b * gig + np.sqrt(gig) * normst
  3028. def _stats(self, p, a, b):
  3029. # https://mpra.ub.uni-muenchen.de/19081/1/MPRA_paper_19081.pdf
  3030. # https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
  3031. # standardized moments
  3032. p, a, b = np.broadcast_arrays(p, a, b)
  3033. t1 = np.float_power(a, 2) - np.float_power(b, 2)
  3034. t1 = np.float_power(t1, 0.5)
  3035. t2 = np.float_power(1, 2) * np.float_power(t1, - 1)
  3036. integers = np.linspace(0, 4, 5)
  3037. # make integers perpendicular to existing dimensions
  3038. integers = integers.reshape(integers.shape + (1,) * p.ndim)
  3039. b0, b1, b2, b3, b4 = sc.kv(p + integers, t1)
  3040. r1, r2, r3, r4 = (b / b0 for b in (b1, b2, b3, b4))
  3041. m = b * t2 * r1
  3042. v = (
  3043. t2 * r1 + np.float_power(b, 2) * np.float_power(t2, 2) *
  3044. (r2 - np.float_power(r1, 2))
  3045. )
  3046. m3e = (
  3047. np.float_power(b, 3) * np.float_power(t2, 3) *
  3048. (r3 - 3 * b2 * b1 * np.float_power(b0, -2) +
  3049. 2 * np.float_power(r1, 3)) +
  3050. 3 * b * np.float_power(t2, 2) *
  3051. (r2 - np.float_power(r1, 2))
  3052. )
  3053. s = m3e * np.float_power(v, - 3 / 2)
  3054. m4e = (
  3055. np.float_power(b, 4) * np.float_power(t2, 4) *
  3056. (r4 - 4 * b3 * b1 * np.float_power(b0, - 2) +
  3057. 6 * b2 * np.float_power(b1, 2) * np.float_power(b0, - 3) -
  3058. 3 * np.float_power(r1, 4)) +
  3059. np.float_power(b, 2) * np.float_power(t2, 3) *
  3060. (6 * r3 - 12 * b2 * b1 * np.float_power(b0, - 2) +
  3061. 6 * np.float_power(r1, 3)) +
  3062. 3 * np.float_power(t2, 2) * r2
  3063. )
  3064. k = m4e * np.float_power(v, -2) - 3
  3065. return m, v, s, k
  3066. genhyperbolic = genhyperbolic_gen(name='genhyperbolic')
  3067. class gompertz_gen(rv_continuous):
  3068. r"""A Gompertz (or truncated Gumbel) continuous random variable.
  3069. %(before_notes)s
  3070. Notes
  3071. -----
  3072. The probability density function for `gompertz` is:
  3073. .. math::
  3074. f(x, c) = c \exp(x) \exp(-c (e^x-1))
  3075. for :math:`x \ge 0`, :math:`c > 0`.
  3076. `gompertz` takes ``c`` as a shape parameter for :math:`c`.
  3077. %(after_notes)s
  3078. %(example)s
  3079. """
  3080. def _shape_info(self):
  3081. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  3082. def _pdf(self, x, c):
  3083. # gompertz.pdf(x, c) = c * exp(x) * exp(-c*(exp(x)-1))
  3084. return np.exp(self._logpdf(x, c))
  3085. def _logpdf(self, x, c):
  3086. return np.log(c) + x - c * sc.expm1(x)
  3087. def _cdf(self, x, c):
  3088. return -sc.expm1(-c * sc.expm1(x))
  3089. def _ppf(self, q, c):
  3090. return sc.log1p(-1.0 / c * sc.log1p(-q))
  3091. def _sf(self, x, c):
  3092. return np.exp(-c * sc.expm1(x))
  3093. def _isf(self, p, c):
  3094. return sc.log1p(-np.log(p)/c)
  3095. def _entropy(self, c):
  3096. return 1.0 - np.log(c) - sc._ufuncs._scaled_exp1(c)/c
  3097. gompertz = gompertz_gen(a=0.0, name='gompertz')
  3098. def _average_with_log_weights(x, logweights):
  3099. x = np.asarray(x)
  3100. logweights = np.asarray(logweights)
  3101. maxlogw = logweights.max()
  3102. weights = np.exp(logweights - maxlogw)
  3103. return np.average(x, weights=weights)
  3104. class gumbel_r_gen(rv_continuous):
  3105. r"""A right-skewed Gumbel continuous random variable.
  3106. %(before_notes)s
  3107. See Also
  3108. --------
  3109. gumbel_l, gompertz, genextreme
  3110. Notes
  3111. -----
  3112. The probability density function for `gumbel_r` is:
  3113. .. math::
  3114. f(x) = \exp(-(x + e^{-x}))
  3115. for real :math:`x`.
  3116. The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
  3117. distribution. It is also related to the extreme value distribution,
  3118. log-Weibull and Gompertz distributions.
  3119. %(after_notes)s
  3120. %(example)s
  3121. """
  3122. def _shape_info(self):
  3123. return []
  3124. def _pdf(self, x):
  3125. # gumbel_r.pdf(x) = exp(-(x + exp(-x)))
  3126. return np.exp(self._logpdf(x))
  3127. def _logpdf(self, x):
  3128. return -x - np.exp(-x)
  3129. def _cdf(self, x):
  3130. return np.exp(-np.exp(-x))
  3131. def _logcdf(self, x):
  3132. return -np.exp(-x)
  3133. def _ppf(self, q):
  3134. return -np.log(-np.log(q))
  3135. def _sf(self, x):
  3136. return -sc.expm1(-np.exp(-x))
  3137. def _isf(self, p):
  3138. return -np.log(-np.log1p(-p))
  3139. def _stats(self):
  3140. return _EULER, np.pi*np.pi/6.0, 12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
  3141. def _entropy(self):
  3142. # https://en.wikipedia.org/wiki/Gumbel_distribution
  3143. return _EULER + 1.
  3144. @_call_super_mom
  3145. @inherit_docstring_from(rv_continuous)
  3146. def fit(self, data, *args, **kwds):
  3147. data, floc, fscale = _check_fit_input_parameters(self, data,
  3148. args, kwds)
  3149. # By the method of maximum likelihood, the estimators of the
  3150. # location and scale are the roots of the equations defined in
  3151. # `func` and the value of the expression for `loc` that follows.
  3152. # The first `func` is a first order derivative of the log-likelihood
  3153. # equation and the second is from Source: Statistical Distributions,
  3154. # 3rd Edition. Evans, Hastings, and Peacock (2000), Page 101.
  3155. def get_loc_from_scale(scale):
  3156. return -scale * (sc.logsumexp(-data / scale) - np.log(len(data)))
  3157. if fscale is not None:
  3158. # if the scale is fixed, the location can be analytically
  3159. # determined.
  3160. scale = fscale
  3161. loc = get_loc_from_scale(scale)
  3162. else:
  3163. # A different function is solved depending on whether the location
  3164. # is fixed.
  3165. if floc is not None:
  3166. loc = floc
  3167. # equation to use if the location is fixed.
  3168. # note that one cannot use the equation in Evans, Hastings,
  3169. # and Peacock (2000) (since it assumes that the derivative
  3170. # w.r.t. the log-likelihood is zero). however, it is easy to
  3171. # derive the MLE condition directly if loc is fixed
  3172. def func(scale):
  3173. term1 = (loc - data) * np.exp((loc - data) / scale) + data
  3174. term2 = len(data) * (loc + scale)
  3175. return term1.sum() - term2
  3176. else:
  3177. # equation to use if both location and scale are free
  3178. def func(scale):
  3179. sdata = -data / scale
  3180. wavg = _average_with_log_weights(data, logweights=sdata)
  3181. return data.mean() - wavg - scale
  3182. # set brackets for `root_scalar` to use when optimizing over the
  3183. # scale such that a root is likely between them. Use user supplied
  3184. # guess or default 1.
  3185. brack_start = kwds.get('scale', 1)
  3186. lbrack, rbrack = brack_start / 2, brack_start * 2
  3187. # if a root is not between the brackets, iteratively expand them
  3188. # until they include a sign change, checking after each bracket is
  3189. # modified.
  3190. def interval_contains_root(lbrack, rbrack):
  3191. # return true if the signs disagree.
  3192. return (np.sign(func(lbrack)) !=
  3193. np.sign(func(rbrack)))
  3194. while (not interval_contains_root(lbrack, rbrack)
  3195. and (lbrack > 0 or rbrack < np.inf)):
  3196. lbrack /= 2
  3197. rbrack *= 2
  3198. res = optimize.root_scalar(func, bracket=(lbrack, rbrack),
  3199. rtol=1e-14, xtol=1e-14)
  3200. scale = res.root
  3201. loc = floc if floc is not None else get_loc_from_scale(scale)
  3202. return loc, scale
  3203. gumbel_r = gumbel_r_gen(name='gumbel_r')
  3204. class gumbel_l_gen(rv_continuous):
  3205. r"""A left-skewed Gumbel continuous random variable.
  3206. %(before_notes)s
  3207. See Also
  3208. --------
  3209. gumbel_r, gompertz, genextreme
  3210. Notes
  3211. -----
  3212. The probability density function for `gumbel_l` is:
  3213. .. math::
  3214. f(x) = \exp(x - e^x)
  3215. for real :math:`x`.
  3216. The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
  3217. distribution. It is also related to the extreme value distribution,
  3218. log-Weibull and Gompertz distributions.
  3219. %(after_notes)s
  3220. %(example)s
  3221. """
  3222. def _shape_info(self):
  3223. return []
  3224. def _pdf(self, x):
  3225. # gumbel_l.pdf(x) = exp(x - exp(x))
  3226. return np.exp(self._logpdf(x))
  3227. def _logpdf(self, x):
  3228. return x - np.exp(x)
  3229. def _cdf(self, x):
  3230. return -sc.expm1(-np.exp(x))
  3231. def _ppf(self, q):
  3232. return np.log(-sc.log1p(-q))
  3233. def _logsf(self, x):
  3234. return -np.exp(x)
  3235. def _sf(self, x):
  3236. return np.exp(-np.exp(x))
  3237. def _isf(self, x):
  3238. return np.log(-np.log(x))
  3239. def _stats(self):
  3240. return -_EULER, np.pi*np.pi/6.0, \
  3241. -12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
  3242. def _entropy(self):
  3243. return _EULER + 1.
  3244. @_call_super_mom
  3245. @inherit_docstring_from(rv_continuous)
  3246. def fit(self, data, *args, **kwds):
  3247. # The fit method of `gumbel_r` can be used for this distribution with
  3248. # small modifications. The process to do this is
  3249. # 1. pass the sign negated data into `gumbel_r.fit`
  3250. # - if the location is fixed, it should also be negated.
  3251. # 2. negate the sign of the resulting location, leaving the scale
  3252. # unmodified.
  3253. # `gumbel_r.fit` holds necessary input checks.
  3254. if kwds.get('floc') is not None:
  3255. kwds['floc'] = -kwds['floc']
  3256. loc_r, scale_r, = gumbel_r.fit(-np.asarray(data), *args, **kwds)
  3257. return -loc_r, scale_r
  3258. gumbel_l = gumbel_l_gen(name='gumbel_l')
  3259. class halfcauchy_gen(rv_continuous):
  3260. r"""A Half-Cauchy continuous random variable.
  3261. %(before_notes)s
  3262. Notes
  3263. -----
  3264. The probability density function for `halfcauchy` is:
  3265. .. math::
  3266. f(x) = \frac{2}{\pi (1 + x^2)}
  3267. for :math:`x \ge 0`.
  3268. %(after_notes)s
  3269. %(example)s
  3270. """
  3271. def _shape_info(self):
  3272. return []
  3273. def _pdf(self, x):
  3274. # halfcauchy.pdf(x) = 2 / (pi * (1 + x**2))
  3275. return 2.0/np.pi/(1.0+x*x)
  3276. def _logpdf(self, x):
  3277. return np.log(2.0/np.pi) - sc.log1p(x*x)
  3278. def _cdf(self, x):
  3279. return 2.0/np.pi*np.arctan(x)
  3280. def _ppf(self, q):
  3281. return np.tan(np.pi/2*q)
  3282. def _sf(self, x):
  3283. return 2.0/np.pi * np.arctan2(1, x)
  3284. def _isf(self, p):
  3285. return 1.0/np.tan(np.pi*p/2)
  3286. def _stats(self):
  3287. return np.inf, np.inf, np.nan, np.nan
  3288. def _entropy(self):
  3289. return np.log(2*np.pi)
  3290. @_call_super_mom
  3291. @inherit_docstring_from(rv_continuous)
  3292. def fit(self, data, *args, **kwds):
  3293. if kwds.pop('superfit', False):
  3294. return super().fit(data, *args, **kwds)
  3295. data, floc, fscale = _check_fit_input_parameters(self, data,
  3296. args, kwds)
  3297. # location is independent from the scale
  3298. data_min = np.min(data)
  3299. if floc is not None:
  3300. if data_min < floc:
  3301. # There are values that are less than the specified loc.
  3302. raise FitDataError("halfcauchy", lower=floc, upper=np.inf)
  3303. loc = floc
  3304. else:
  3305. # if not provided, location MLE is the minimal data point
  3306. loc = data_min
  3307. # find scale
  3308. def find_scale(loc, data):
  3309. shifted_data = data - loc
  3310. n = data.size
  3311. shifted_data_squared = np.square(shifted_data)
  3312. def fun_to_solve(scale):
  3313. denominator = scale**2 + shifted_data_squared
  3314. return 2 * np.sum(shifted_data_squared/denominator) - n
  3315. small = np.finfo(1.0).tiny**0.5 # avoid underflow
  3316. res = root_scalar(fun_to_solve, bracket=(small, np.max(shifted_data)))
  3317. return res.root
  3318. if fscale is not None:
  3319. scale = fscale
  3320. else:
  3321. scale = find_scale(loc, data)
  3322. return loc, scale
  3323. halfcauchy = halfcauchy_gen(a=0.0, name='halfcauchy')
  3324. class halflogistic_gen(rv_continuous):
  3325. r"""A half-logistic continuous random variable.
  3326. %(before_notes)s
  3327. Notes
  3328. -----
  3329. The probability density function for `halflogistic` is:
  3330. .. math::
  3331. f(x) = \frac{ 2 e^{-x} }{ (1+e^{-x})^2 }
  3332. = \frac{1}{2} \text{sech}(x/2)^2
  3333. for :math:`x \ge 0`.
  3334. %(after_notes)s
  3335. References
  3336. ----------
  3337. .. [1] Asgharzadeh et al (2011). "Comparisons of Methods of Estimation for the
  3338. Half-Logistic Distribution". Selcuk J. Appl. Math. 93-108.
  3339. %(example)s
  3340. """
  3341. def _shape_info(self):
  3342. return []
  3343. def _pdf(self, x):
  3344. # halflogistic.pdf(x) = 2 * exp(-x) / (1+exp(-x))**2
  3345. # = 1/2 * sech(x/2)**2
  3346. return np.exp(self._logpdf(x))
  3347. def _logpdf(self, x):
  3348. return np.log(2) - x - 2. * sc.log1p(np.exp(-x))
  3349. def _cdf(self, x):
  3350. return np.tanh(x/2.0)
  3351. def _ppf(self, q):
  3352. return 2*np.arctanh(q)
  3353. def _sf(self, x):
  3354. return 2 * sc.expit(-x)
  3355. def _isf(self, q):
  3356. return xpx.apply_where(q < 0.5, q,
  3357. lambda q: -sc.logit(0.5 * q),
  3358. lambda q: 2*np.arctanh(1 - q))
  3359. def _munp(self, n):
  3360. if n == 0:
  3361. return 1 # otherwise returns NaN
  3362. if n == 1:
  3363. return 2*np.log(2)
  3364. if n == 2:
  3365. return np.pi*np.pi/3.0
  3366. if n == 3:
  3367. return 9*_ZETA3
  3368. if n == 4:
  3369. return 7*np.pi**4 / 15.0
  3370. return 2*(1-pow(2.0, 1-n))*sc.gamma(n+1)*sc.zeta(n, 1)
  3371. def _entropy(self):
  3372. return 2-np.log(2)
  3373. @_call_super_mom
  3374. @inherit_docstring_from(rv_continuous)
  3375. def fit(self, data, *args, **kwds):
  3376. if kwds.pop('superfit', False):
  3377. return super().fit(data, *args, **kwds)
  3378. data, floc, fscale = _check_fit_input_parameters(self, data,
  3379. args, kwds)
  3380. def find_scale(data, loc):
  3381. # scale is solution to a fix point problem ([1] 2.6)
  3382. # use approximate MLE as starting point ([1] 3.1)
  3383. n_observations = data.shape[0]
  3384. sorted_data = np.sort(data, axis=0)
  3385. p = np.arange(1, n_observations + 1)/(n_observations + 1)
  3386. q = 1 - p
  3387. pp1 = 1 + p
  3388. alpha = p - 0.5 * q * pp1 * np.log(pp1 / q)
  3389. beta = 0.5 * q * pp1
  3390. sorted_data = sorted_data - loc
  3391. B = 2 * np.sum(alpha[1:] * sorted_data[1:])
  3392. C = 2 * np.sum(beta[1:] * sorted_data[1:]**2)
  3393. # starting guess
  3394. scale = ((B + np.sqrt(B**2 + 8 * n_observations * C))
  3395. /(4 * n_observations))
  3396. # relative tolerance of fix point iterator
  3397. rtol = 1e-8
  3398. relative_residual = 1
  3399. shifted_mean = sorted_data.mean() # y_mean - y_min
  3400. # find fix point by repeated application of eq. (2.6)
  3401. # simplify as
  3402. # exp(-x) / (1 + exp(-x)) = 1 / (1 + exp(x))
  3403. # = expit(-x))
  3404. while relative_residual > rtol:
  3405. sum_term = sorted_data * sc.expit(-sorted_data/scale)
  3406. scale_new = shifted_mean - 2/n_observations * sum_term.sum()
  3407. relative_residual = abs((scale - scale_new)/scale)
  3408. scale = scale_new
  3409. return scale
  3410. # location is independent from the scale
  3411. data_min = np.min(data)
  3412. if floc is not None:
  3413. if data_min < floc:
  3414. # There are values that are less than the specified loc.
  3415. raise FitDataError("halflogistic", lower=floc, upper=np.inf)
  3416. loc = floc
  3417. else:
  3418. # if not provided, location MLE is the minimal data point
  3419. loc = data_min
  3420. # scale depends on location
  3421. scale = fscale if fscale is not None else find_scale(data, loc)
  3422. return loc, scale
  3423. halflogistic = halflogistic_gen(a=0.0, name='halflogistic')
  3424. class halfnorm_gen(rv_continuous):
  3425. r"""A half-normal continuous random variable.
  3426. %(before_notes)s
  3427. Notes
  3428. -----
  3429. The probability density function for `halfnorm` is:
  3430. .. math::
  3431. f(x) = \sqrt{2/\pi} \exp(-x^2 / 2)
  3432. for :math:`x >= 0`.
  3433. `halfnorm` is a special case of `chi` with ``df=1``.
  3434. %(after_notes)s
  3435. %(example)s
  3436. """
  3437. def _shape_info(self):
  3438. return []
  3439. def _rvs(self, size=None, random_state=None):
  3440. return abs(random_state.standard_normal(size=size))
  3441. def _pdf(self, x):
  3442. # halfnorm.pdf(x) = sqrt(2/pi) * exp(-x**2/2)
  3443. return np.sqrt(2.0/np.pi)*np.exp(-x*x/2.0)
  3444. def _logpdf(self, x):
  3445. return 0.5 * np.log(2.0/np.pi) - x*x/2.0
  3446. def _cdf(self, x):
  3447. return sc.erf(x / np.sqrt(2))
  3448. def _ppf(self, q):
  3449. return _norm_ppf((1+q)/2.0)
  3450. def _sf(self, x):
  3451. return 2 * _norm_sf(x)
  3452. def _isf(self, p):
  3453. return _norm_isf(p/2)
  3454. def _stats(self):
  3455. return (np.sqrt(2.0/np.pi),
  3456. 1-2.0/np.pi,
  3457. np.sqrt(2)*(4-np.pi)/(np.pi-2)**1.5,
  3458. 8*(np.pi-3)/(np.pi-2)**2)
  3459. def _entropy(self):
  3460. return 0.5*np.log(np.pi/2.0)+0.5
  3461. @_call_super_mom
  3462. @inherit_docstring_from(rv_continuous)
  3463. def fit(self, data, *args, **kwds):
  3464. if kwds.pop('superfit', False):
  3465. return super().fit(data, *args, **kwds)
  3466. data, floc, fscale = _check_fit_input_parameters(self, data,
  3467. args, kwds)
  3468. data_min = np.min(data)
  3469. if floc is not None:
  3470. if data_min < floc:
  3471. # There are values that are less than the specified loc.
  3472. raise FitDataError("halfnorm", lower=floc, upper=np.inf)
  3473. loc = floc
  3474. else:
  3475. loc = data_min
  3476. if fscale is not None:
  3477. scale = fscale
  3478. else:
  3479. scale = stats.moment(data, order=2, center=loc)**0.5
  3480. return loc, scale
  3481. halfnorm = halfnorm_gen(a=0.0, name='halfnorm')
  3482. class hypsecant_gen(rv_continuous):
  3483. r"""A hyperbolic secant continuous random variable.
  3484. %(before_notes)s
  3485. Notes
  3486. -----
  3487. The probability density function for `hypsecant` is:
  3488. .. math::
  3489. f(x) = \frac{1}{\pi} \text{sech}(x)
  3490. for a real number :math:`x`.
  3491. %(after_notes)s
  3492. %(example)s
  3493. """
  3494. def _shape_info(self):
  3495. return []
  3496. def _pdf(self, x):
  3497. # hypsecant.pdf(x) = 1/pi * sech(x)
  3498. return 1.0/(np.pi*np.cosh(x))
  3499. def _cdf(self, x):
  3500. return 2.0/np.pi*np.arctan(np.exp(x))
  3501. def _ppf(self, q):
  3502. return np.log(np.tan(np.pi*q/2.0))
  3503. def _sf(self, x):
  3504. return 2.0/np.pi*np.arctan(np.exp(-x))
  3505. def _isf(self, q):
  3506. return -np.log(np.tan(np.pi*q/2.0))
  3507. def _stats(self):
  3508. return 0, np.pi*np.pi/4, 0, 2
  3509. def _entropy(self):
  3510. return np.log(2*np.pi)
  3511. hypsecant = hypsecant_gen(name='hypsecant')
  3512. class gausshyper_gen(rv_continuous):
  3513. r"""A Gauss hypergeometric continuous random variable.
  3514. %(before_notes)s
  3515. Notes
  3516. -----
  3517. The probability density function for `gausshyper` is:
  3518. .. math::
  3519. f(x, a, b, c, z) = C x^{a-1} (1-x)^{b-1} (1+zx)^{-c}
  3520. for :math:`0 \le x \le 1`, :math:`a,b > 0`, :math:`c` a real number,
  3521. :math:`z > -1`, and :math:`C = \frac{1}{B(a, b) F[2, 1](c, a; a+b; -z)}`.
  3522. :math:`F[2, 1]` is the Gauss hypergeometric function
  3523. `scipy.special.hyp2f1`.
  3524. `gausshyper` takes :math:`a`, :math:`b`, :math:`c` and :math:`z` as shape
  3525. parameters.
  3526. %(after_notes)s
  3527. References
  3528. ----------
  3529. .. [1] Armero, C., and M. J. Bayarri. "Prior Assessments for Prediction in
  3530. Queues." *Journal of the Royal Statistical Society*. Series D (The
  3531. Statistician) 43, no. 1 (1994): 139-53. doi:10.2307/2348939
  3532. %(example)s
  3533. """
  3534. def _argcheck(self, a, b, c, z):
  3535. # z > -1 per gh-10134
  3536. return (a > 0) & (b > 0) & (c == c) & (z > -1)
  3537. def _shape_info(self):
  3538. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  3539. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  3540. ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
  3541. iz = _ShapeInfo("z", False, (-1, np.inf), (False, False))
  3542. return [ia, ib, ic, iz]
  3543. def _pdf(self, x, a, b, c, z):
  3544. normalization_constant = sc.beta(a, b) * sc.hyp2f1(c, a, a + b, -z)
  3545. return (1./normalization_constant * x**(a - 1.) * (1. - x)**(b - 1.0)
  3546. / (1.0 + z*x)**c)
  3547. def _munp(self, n, a, b, c, z):
  3548. fac = sc.beta(n+a, b) / sc.beta(a, b)
  3549. num = sc.hyp2f1(c, a+n, a+b+n, -z)
  3550. den = sc.hyp2f1(c, a, a+b, -z)
  3551. return fac*num / den
  3552. gausshyper = gausshyper_gen(a=0.0, b=1.0, name='gausshyper')
  3553. class invgamma_gen(rv_continuous):
  3554. r"""An inverted gamma continuous random variable.
  3555. %(before_notes)s
  3556. Notes
  3557. -----
  3558. The probability density function for `invgamma` is:
  3559. .. math::
  3560. f(x, a) = \frac{x^{-a-1}}{\Gamma(a)} \exp(-\frac{1}{x})
  3561. for :math:`x >= 0`, :math:`a > 0`. :math:`\Gamma` is the gamma function
  3562. (`scipy.special.gamma`).
  3563. `invgamma` takes ``a`` as a shape parameter for :math:`a`.
  3564. `invgamma` is a special case of `gengamma` with ``c=-1``, and it is a
  3565. different parameterization of the scaled inverse chi-squared distribution.
  3566. Specifically, if the scaled inverse chi-squared distribution is
  3567. parameterized with degrees of freedom :math:`\nu` and scaling parameter
  3568. :math:`\tau^2`, then it can be modeled using `invgamma` with
  3569. ``a=`` :math:`\nu/2` and ``scale=`` :math:`\nu \tau^2/2`.
  3570. %(after_notes)s
  3571. %(example)s
  3572. """
  3573. _support_mask = rv_continuous._open_support_mask
  3574. def _shape_info(self):
  3575. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  3576. def _pdf(self, x, a):
  3577. # invgamma.pdf(x, a) = x**(-a-1) / gamma(a) * exp(-1/x)
  3578. return np.exp(self._logpdf(x, a))
  3579. def _logpdf(self, x, a):
  3580. return -(a+1) * np.log(x) - sc.gammaln(a) - 1.0/x
  3581. def _cdf(self, x, a):
  3582. return sc.gammaincc(a, 1.0 / x)
  3583. def _ppf(self, q, a):
  3584. return 1.0 / sc.gammainccinv(a, q)
  3585. def _sf(self, x, a):
  3586. return sc.gammainc(a, 1.0 / x)
  3587. def _isf(self, q, a):
  3588. return 1.0 / sc.gammaincinv(a, q)
  3589. def _stats(self, a, moments='mvsk'):
  3590. m1 = xpx.apply_where(a > 1, a,
  3591. lambda x: 1. / (x - 1.),
  3592. fill_value=np.inf)
  3593. m2 = xpx.apply_where(a > 2, a,
  3594. lambda x: 1. / (x - 1.)**2 / (x - 2.),
  3595. fill_value=np.inf)
  3596. g1, g2 = None, None
  3597. if 's' in moments:
  3598. g1 = xpx.apply_where(a > 3, a,
  3599. lambda x: 4. * np.sqrt(x - 2.) / (x - 3.),
  3600. fill_value=np.nan)
  3601. if 'k' in moments:
  3602. g2 = xpx.apply_where(a > 4, a,
  3603. lambda x: 6. * (5. * x - 11.) / (x - 3.) / (x - 4.),
  3604. fill_value=np.nan)
  3605. return m1, m2, g1, g2
  3606. def _entropy(self, a):
  3607. def regular(a):
  3608. h = a - (a + 1.0) * sc.psi(a) + sc.gammaln(a)
  3609. return h
  3610. def asymptotic(a):
  3611. # gammaln(a) ~ a * ln(a) - a - 0.5 * ln(a) + 0.5 * ln(2 * pi)
  3612. # psi(a) ~ ln(a) - 1 / (2 * a)
  3613. h = ((1 - 3*np.log(a) + np.log(2) + np.log(np.pi))/2
  3614. + 2/3*a**-1. + a**-2./12 - a**-3./90 - a**-4./120)
  3615. return h
  3616. h = xpx.apply_where(a >= 200, a, asymptotic, regular)
  3617. return h
  3618. invgamma = invgamma_gen(a=0.0, name='invgamma')
  3619. class invgauss_gen(rv_continuous):
  3620. r"""An inverse Gaussian continuous random variable.
  3621. %(before_notes)s
  3622. Notes
  3623. -----
  3624. The probability density function for `invgauss` is:
  3625. .. math::
  3626. f(x; \mu) = \frac{1}{\sqrt{2 \pi x^3}}
  3627. \exp\left(-\frac{(x-\mu)^2}{2 \mu^2 x}\right)
  3628. for :math:`x \ge 0` and :math:`\mu > 0`.
  3629. `invgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
  3630. %(after_notes)s
  3631. A common shape-scale parameterization of the inverse Gaussian distribution
  3632. has density
  3633. .. math::
  3634. f(x; \nu, \lambda) = \sqrt{\frac{\lambda}{2 \pi x^3}}
  3635. \exp\left( -\frac{\lambda(x-\nu)^2}{2 \nu^2 x}\right)
  3636. Using ``nu`` for :math:`\nu` and ``lam`` for :math:`\lambda`, this
  3637. parameterization is equivalent to the one above with ``mu = nu/lam``,
  3638. ``loc = 0``, and ``scale = lam``.
  3639. This distribution uses routines from the Boost Math C++ library for
  3640. the computation of the ``ppf`` and ``isf`` methods. [1]_
  3641. References
  3642. ----------
  3643. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  3644. %(example)s
  3645. """
  3646. _support_mask = rv_continuous._open_support_mask
  3647. def _shape_info(self):
  3648. return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
  3649. def _rvs(self, mu, size=None, random_state=None):
  3650. return random_state.wald(mu, 1.0, size=size)
  3651. def _pdf(self, x, mu):
  3652. # invgauss.pdf(x, mu) =
  3653. # 1 / sqrt(2*pi*x**3) * exp(-(x-mu)**2/(2*x*mu**2))
  3654. return 1.0/np.sqrt(2*np.pi*x**3.0)*np.exp(-1.0/(2*x)*(x/mu - 1)**2)
  3655. def _logpdf(self, x, mu):
  3656. return -0.5*np.log(2*np.pi) - 1.5*np.log(x) - (x/mu - 1)**2/(2*x)
  3657. # approach adapted from equations in
  3658. # https://journal.r-project.org/archive/2016-1/giner-smyth.pdf,
  3659. # not R code. see gh-13616
  3660. def _logcdf(self, x, mu):
  3661. fac = 1 / np.sqrt(x)
  3662. a = _norm_logcdf(fac * (x/mu - 1))
  3663. b = 2 / mu + _norm_logcdf(-fac * (x/mu + 1))
  3664. return a + np.log1p(np.exp(b - a))
  3665. def _logsf(self, x, mu):
  3666. fac = 1 / np.sqrt(x)
  3667. a = _norm_logsf(fac * (x/mu - 1))
  3668. b = 2 / mu + _norm_logcdf(-fac * (x/mu + 1))
  3669. return a + np.log1p(-np.exp(b - a))
  3670. def _sf(self, x, mu):
  3671. return np.exp(self._logsf(x, mu))
  3672. def _cdf(self, x, mu):
  3673. return np.exp(self._logcdf(x, mu))
  3674. def _ppf(self, x, mu):
  3675. with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
  3676. x, mu = np.broadcast_arrays(x, mu)
  3677. ppf = np.asarray(scu._invgauss_ppf(x, mu, 1))
  3678. i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
  3679. ppf[i_wt] = scu._invgauss_isf(1-x[i_wt], mu[i_wt], 1)
  3680. i_nan = np.isnan(ppf)
  3681. ppf[i_nan] = super()._ppf(x[i_nan], mu[i_nan])
  3682. return ppf
  3683. def _isf(self, x, mu):
  3684. with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
  3685. x, mu = np.broadcast_arrays(x, mu)
  3686. isf = scu._invgauss_isf(x, mu, 1)
  3687. i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
  3688. isf[i_wt] = scu._invgauss_ppf(1-x[i_wt], mu[i_wt], 1)
  3689. i_nan = np.isnan(isf)
  3690. isf[i_nan] = super()._isf(x[i_nan], mu[i_nan])
  3691. return isf
  3692. def _stats(self, mu):
  3693. return mu, mu**3.0, 3*np.sqrt(mu), 15*mu
  3694. @inherit_docstring_from(rv_continuous)
  3695. def fit(self, data, *args, **kwds):
  3696. method = kwds.get('method', 'mle')
  3697. if (isinstance(data, CensoredData) or isinstance(self, wald_gen)
  3698. or method.lower() == 'mm'):
  3699. return super().fit(data, *args, **kwds)
  3700. data, fshape_s, floc, fscale = _check_fit_input_parameters(self, data,
  3701. args, kwds)
  3702. '''
  3703. Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
  3704. and Peacock (2000), Page 121. Their shape parameter is equivalent to
  3705. SciPy's with the conversion `fshape_s = fshape / scale`.
  3706. MLE formulas are not used in 3 conditions:
  3707. - `loc` is not fixed
  3708. - `mu` is fixed
  3709. These cases fall back on the superclass fit method.
  3710. - `loc` is fixed but translation results in negative data raises
  3711. a `FitDataError`.
  3712. '''
  3713. if floc is None or fshape_s is not None:
  3714. return super().fit(data, *args, **kwds)
  3715. elif np.any(data - floc < 0):
  3716. raise FitDataError("invgauss", lower=0, upper=np.inf)
  3717. else:
  3718. data = data - floc
  3719. fshape_n = np.mean(data)
  3720. if fscale is None:
  3721. fscale = len(data) / (np.sum(data ** -1 - fshape_n ** -1))
  3722. fshape_s = fshape_n / fscale
  3723. return fshape_s, floc, fscale
  3724. def _entropy(self, mu):
  3725. """
  3726. Ref.: https://moser-isi.ethz.ch/docs/papers/smos-2012-10.pdf (eq. 9)
  3727. """
  3728. # a = log(2*pi*e*mu**3)
  3729. # = 1 + log(2*pi) + 3 * log(mu)
  3730. a = 1. + np.log(2 * np.pi) + 3 * np.log(mu)
  3731. # b = exp(2/mu) * exp1(2/mu)
  3732. # = _scaled_exp1(2/mu) / (2/mu)
  3733. r = 2/mu
  3734. b = sc._ufuncs._scaled_exp1(r)/r
  3735. return 0.5 * a - 1.5 * b
  3736. invgauss = invgauss_gen(a=0.0, name='invgauss')
  3737. class geninvgauss_gen(rv_continuous):
  3738. r"""A Generalized Inverse Gaussian continuous random variable.
  3739. %(before_notes)s
  3740. Notes
  3741. -----
  3742. The probability density function for `geninvgauss` is:
  3743. .. math::
  3744. f(x, p, b) = x^{p-1} \exp(-b (x + 1/x) / 2) / (2 K_p(b))
  3745. where ``x > 0``, `p` is a real number and ``b > 0``\([1]_).
  3746. :math:`K_p` is the modified Bessel function of second kind of order `p`
  3747. (`scipy.special.kv`).
  3748. %(after_notes)s
  3749. The inverse Gaussian distribution `stats.invgauss(mu)` is a special case of
  3750. `geninvgauss` with ``p = -1/2``, ``b = 1 / mu`` and ``scale = mu``.
  3751. Generating random variates is challenging for this distribution. The
  3752. implementation is based on [2]_.
  3753. References
  3754. ----------
  3755. .. [1] O. Barndorff-Nielsen, P. Blaesild, C. Halgreen, "First hitting time
  3756. models for the generalized inverse gaussian distribution",
  3757. Stochastic Processes and their Applications 7, pp. 49--54, 1978.
  3758. .. [2] W. Hoermann and J. Leydold, "Generating generalized inverse Gaussian
  3759. random variates", Statistics and Computing, 24(4), p. 547--557, 2014.
  3760. %(example)s
  3761. """
  3762. def _argcheck(self, p, b):
  3763. return (p == p) & (b > 0)
  3764. def _shape_info(self):
  3765. ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
  3766. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  3767. return [ip, ib]
  3768. def _logpdf(self, x, p, b):
  3769. # kve instead of kv works better for large values of b
  3770. # warn if kve produces infinite values and replace by nan
  3771. # otherwise c = -inf and the results are often incorrect
  3772. def logpdf_single(x, p, b):
  3773. return _stats.geninvgauss_logpdf(x, p, b)
  3774. logpdf_single = np.vectorize(logpdf_single, otypes=[np.float64])
  3775. z = logpdf_single(x, p, b)
  3776. if np.isnan(z).any():
  3777. msg = ("Infinite values encountered in scipy.special.kve(p, b). "
  3778. "Values replaced by NaN to avoid incorrect results.")
  3779. warnings.warn(msg, RuntimeWarning, stacklevel=3)
  3780. return z
  3781. def _pdf(self, x, p, b):
  3782. # relying on logpdf avoids overflow of x**(p-1) for large x and p
  3783. return np.exp(self._logpdf(x, p, b))
  3784. def _cdf(self, x, p, b):
  3785. _a, _b = self._get_support(p, b)
  3786. def _cdf_single(x, p, b):
  3787. user_data = np.array([p, b], float).ctypes.data_as(ctypes.c_void_p)
  3788. llc = LowLevelCallable.from_cython(_stats, '_geninvgauss_pdf',
  3789. user_data)
  3790. return integrate.quad(llc, _a, x)[0]
  3791. _cdf_single = np.vectorize(_cdf_single, otypes=[np.float64])
  3792. return _cdf_single(x, p, b)
  3793. def _logquasipdf(self, x, p, b):
  3794. # log of the quasi-density (w/o normalizing constant) used in _rvs
  3795. return xpx.apply_where(x > 0, (x, p, b),
  3796. lambda x, p, b: (p - 1)*np.log(x) - b*(x + 1/x)/2,
  3797. fill_value=-np.inf)
  3798. def _rvs(self, p, b, size=None, random_state=None):
  3799. # if p and b are scalar, use _rvs_scalar, otherwise need to create
  3800. # output by iterating over parameters
  3801. if np.isscalar(p) and np.isscalar(b):
  3802. out = self._rvs_scalar(p, b, size, random_state)
  3803. elif p.size == 1 and b.size == 1:
  3804. out = self._rvs_scalar(p.item(), b.item(), size, random_state)
  3805. else:
  3806. # When this method is called, size will be a (possibly empty)
  3807. # tuple of integers. It will not be None; if `size=None` is passed
  3808. # to `rvs()`, size will be the empty tuple ().
  3809. p, b = np.broadcast_arrays(p, b)
  3810. # p and b now have the same shape.
  3811. # `shp` is the shape of the blocks of random variates that are
  3812. # generated for each combination of parameters associated with
  3813. # broadcasting p and b.
  3814. # bc is a tuple the same length as size. The values
  3815. # in bc are bools. If bc[j] is True, it means that
  3816. # entire axis is filled in for a given combination of the
  3817. # broadcast arguments.
  3818. shp, bc = _check_shape(p.shape, size)
  3819. # `numsamples` is the total number of variates to be generated
  3820. # for each combination of the input arguments.
  3821. numsamples = int(np.prod(shp))
  3822. # `out` is the array to be returned. It is filled in the
  3823. # loop below.
  3824. out = np.empty(size)
  3825. it = np.nditer([p, b],
  3826. flags=['multi_index'],
  3827. op_flags=[['readonly'], ['readonly']])
  3828. while not it.finished:
  3829. # Convert the iterator's multi_index into an index into the
  3830. # `out` array where the call to _rvs_scalar() will be stored.
  3831. # Where bc is True, we use a full slice; otherwise we use the
  3832. # index value from it.multi_index. len(it.multi_index) might
  3833. # be less than len(bc), and in that case we want to align these
  3834. # two sequences to the right, so the loop variable j runs from
  3835. # -len(size) to 0. This doesn't cause an IndexError, as
  3836. # bc[j] will be True in those cases where it.multi_index[j]
  3837. # would cause an IndexError.
  3838. idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
  3839. for j in range(-len(size), 0))
  3840. out[idx] = self._rvs_scalar(it[0], it[1], numsamples,
  3841. random_state).reshape(shp)
  3842. it.iternext()
  3843. if size == ():
  3844. out = out.item()
  3845. return out
  3846. def _rvs_scalar(self, p, b, numsamples, random_state):
  3847. # following [2], the quasi-pdf is used instead of the pdf for the
  3848. # generation of rvs
  3849. invert_res = False
  3850. if not numsamples:
  3851. numsamples = 1
  3852. if p < 0:
  3853. # note: if X is geninvgauss(p, b), then 1/X is geninvgauss(-p, b)
  3854. p = -p
  3855. invert_res = True
  3856. m = self._mode(p, b)
  3857. # determine method to be used following [2]
  3858. ratio_unif = True
  3859. if p >= 1 or b > 1:
  3860. # ratio of uniforms with mode shift below
  3861. mode_shift = True
  3862. elif b >= min(0.5, 2 * np.sqrt(1 - p) / 3):
  3863. # ratio of uniforms without mode shift below
  3864. mode_shift = False
  3865. else:
  3866. # new algorithm in [2]
  3867. ratio_unif = False
  3868. # prepare sampling of rvs
  3869. size1d = tuple(np.atleast_1d(numsamples))
  3870. N = np.prod(size1d) # number of rvs needed, reshape upon return
  3871. x = np.zeros(N)
  3872. simulated = 0
  3873. if ratio_unif:
  3874. # use ratio of uniforms method
  3875. if mode_shift:
  3876. a2 = -2 * (p + 1) / b - m
  3877. a1 = 2 * m * (p - 1) / b - 1
  3878. # find roots of x**3 + a2*x**2 + a1*x + m (Cardano's formula)
  3879. p1 = a1 - a2**2 / 3
  3880. q1 = 2 * a2**3 / 27 - a2 * a1 / 3 + m
  3881. phi = np.arccos(-q1 * np.sqrt(-27 / p1**3) / 2)
  3882. s1 = -np.sqrt(-4 * p1 / 3)
  3883. root1 = s1 * np.cos(phi / 3 + np.pi / 3) - a2 / 3
  3884. root2 = -s1 * np.cos(phi / 3) - a2 / 3
  3885. # root3 = s1 * np.cos(phi / 3 - np.pi / 3) - a2 / 3
  3886. # if g is the quasipdf, rescale: g(x) / g(m) which we can write
  3887. # as exp(log(g(x)) - log(g(m))). This is important
  3888. # since for large values of p and b, g cannot be evaluated.
  3889. # denote the rescaled quasipdf by h
  3890. lm = self._logquasipdf(m, p, b)
  3891. d1 = self._logquasipdf(root1, p, b) - lm
  3892. d2 = self._logquasipdf(root2, p, b) - lm
  3893. # compute the bounding rectangle w.r.t. h. Note that
  3894. # np.exp(0.5*d1) = np.sqrt(g(root1)/g(m)) = np.sqrt(h(root1))
  3895. vmin = (root1 - m) * np.exp(0.5 * d1)
  3896. vmax = (root2 - m) * np.exp(0.5 * d2)
  3897. umax = 1 # umax = sqrt(h(m)) = 1
  3898. def logqpdf(x):
  3899. return self._logquasipdf(x, p, b) - lm
  3900. c = m
  3901. else:
  3902. # ratio of uniforms without mode shift
  3903. # compute np.sqrt(quasipdf(m))
  3904. umax = np.exp(0.5*self._logquasipdf(m, p, b))
  3905. xplus = ((1 + p) + np.sqrt((1 + p)**2 + b**2))/b
  3906. vmin = 0
  3907. # compute xplus * np.sqrt(quasipdf(xplus))
  3908. vmax = xplus * np.exp(0.5 * self._logquasipdf(xplus, p, b))
  3909. c = 0
  3910. def logqpdf(x):
  3911. return self._logquasipdf(x, p, b)
  3912. if vmin >= vmax:
  3913. raise ValueError("vmin must be smaller than vmax.")
  3914. if umax <= 0:
  3915. raise ValueError("umax must be positive.")
  3916. i = 1
  3917. while simulated < N:
  3918. k = N - simulated
  3919. # simulate uniform rvs on [0, umax] and [vmin, vmax]
  3920. u = umax * random_state.uniform(size=k)
  3921. v = random_state.uniform(size=k)
  3922. v = vmin + (vmax - vmin) * v
  3923. rvs = v / u + c
  3924. # rewrite acceptance condition u**2 <= pdf(rvs) by taking logs
  3925. accept = (2*np.log(u) <= logqpdf(rvs))
  3926. num_accept = np.sum(accept)
  3927. if num_accept > 0:
  3928. x[simulated:(simulated + num_accept)] = rvs[accept]
  3929. simulated += num_accept
  3930. if (simulated == 0) and (i*N >= 50000):
  3931. msg = ("Not a single random variate could be generated "
  3932. f"in {i*N} attempts. Sampling does not appear to "
  3933. "work for the provided parameters.")
  3934. raise RuntimeError(msg)
  3935. i += 1
  3936. else:
  3937. # use new algorithm in [2]
  3938. x0 = b / (1 - p)
  3939. xs = np.max((x0, 2 / b))
  3940. k1 = np.exp(self._logquasipdf(m, p, b))
  3941. A1 = k1 * x0
  3942. if x0 < 2 / b:
  3943. k2 = np.exp(-b)
  3944. if p > 0:
  3945. A2 = k2 * ((2 / b)**p - x0**p) / p
  3946. else:
  3947. A2 = k2 * np.log(2 / b**2)
  3948. else:
  3949. k2, A2 = 0, 0
  3950. k3 = xs**(p - 1)
  3951. A3 = 2 * k3 * np.exp(-xs * b / 2) / b
  3952. A = A1 + A2 + A3
  3953. # [2]: rejection constant is < 2.73; so expected runtime is finite
  3954. while simulated < N:
  3955. k = N - simulated
  3956. h, rvs = np.zeros(k), np.zeros(k)
  3957. # simulate uniform rvs on [x1, x2] and [0, y2]
  3958. u = random_state.uniform(size=k)
  3959. v = A * random_state.uniform(size=k)
  3960. cond1 = v <= A1
  3961. cond2 = np.logical_not(cond1) & (v <= A1 + A2)
  3962. cond3 = np.logical_not(cond1 | cond2)
  3963. # subdomain (0, x0)
  3964. rvs[cond1] = x0 * v[cond1] / A1
  3965. h[cond1] = k1
  3966. # subdomain (x0, 2 / b)
  3967. if p > 0:
  3968. rvs[cond2] = (x0**p + (v[cond2] - A1) * p / k2)**(1 / p)
  3969. else:
  3970. rvs[cond2] = b * np.exp((v[cond2] - A1) * np.exp(b))
  3971. h[cond2] = k2 * rvs[cond2]**(p - 1)
  3972. # subdomain (xs, infinity)
  3973. z = np.exp(-xs * b / 2) - b * (v[cond3] - A1 - A2) / (2 * k3)
  3974. rvs[cond3] = -2 / b * np.log(z)
  3975. h[cond3] = k3 * np.exp(-rvs[cond3] * b / 2)
  3976. # apply rejection method
  3977. accept = (np.log(u * h) <= self._logquasipdf(rvs, p, b))
  3978. num_accept = sum(accept)
  3979. if num_accept > 0:
  3980. x[simulated:(simulated + num_accept)] = rvs[accept]
  3981. simulated += num_accept
  3982. rvs = np.reshape(x, size1d)
  3983. if invert_res:
  3984. rvs = 1 / rvs
  3985. return rvs
  3986. def _mode(self, p, b):
  3987. # distinguish cases to avoid catastrophic cancellation (see [2])
  3988. if p < 1:
  3989. return b / (np.sqrt((p - 1)**2 + b**2) + 1 - p)
  3990. else:
  3991. return (np.sqrt((1 - p)**2 + b**2) - (1 - p)) / b
  3992. def _munp(self, n, p, b):
  3993. num = sc.kve(p + n, b)
  3994. denom = sc.kve(p, b)
  3995. inf_vals = np.isinf(num) | np.isinf(denom)
  3996. if inf_vals.any():
  3997. msg = ("Infinite values encountered in the moment calculation "
  3998. "involving scipy.special.kve. Values replaced by NaN to "
  3999. "avoid incorrect results.")
  4000. warnings.warn(msg, RuntimeWarning, stacklevel=3)
  4001. m = np.full_like(num, np.nan, dtype=np.float64)
  4002. m[~inf_vals] = num[~inf_vals] / denom[~inf_vals]
  4003. else:
  4004. m = num / denom
  4005. return m
  4006. geninvgauss = geninvgauss_gen(a=0.0, name="geninvgauss")
  4007. class norminvgauss_gen(rv_continuous):
  4008. r"""A Normal Inverse Gaussian continuous random variable.
  4009. %(before_notes)s
  4010. Notes
  4011. -----
  4012. The probability density function for `norminvgauss` is:
  4013. .. math::
  4014. f(x, a, b) = \frac{a \, K_1(a \sqrt{1 + x^2})}{\pi \sqrt{1 + x^2}} \,
  4015. \exp(\sqrt{a^2 - b^2} + b x)
  4016. where :math:`x` is a real number, the parameter :math:`a` is the tail
  4017. heaviness and :math:`b` is the asymmetry parameter satisfying
  4018. :math:`a > 0` and :math:`|b| <= a`.
  4019. :math:`K_1` is the modified Bessel function of second kind
  4020. (`scipy.special.k1`).
  4021. %(after_notes)s
  4022. A normal inverse Gaussian random variable `Y` with parameters `a` and `b`
  4023. can be expressed as a normal mean-variance mixture:
  4024. ``Y = b * V + sqrt(V) * X`` where `X` is ``norm(0,1)`` and `V` is
  4025. ``invgauss(mu=1/sqrt(a**2 - b**2))``. This representation is used
  4026. to generate random variates.
  4027. Another common parametrization of the distribution (see Equation 2.1 in
  4028. [2]_) is given by the following expression of the pdf:
  4029. .. math::
  4030. g(x, \alpha, \beta, \delta, \mu) =
  4031. \frac{\alpha\delta K_1\left(\alpha\sqrt{\delta^2 + (x - \mu)^2}\right)}
  4032. {\pi \sqrt{\delta^2 + (x - \mu)^2}} \,
  4033. e^{\delta \sqrt{\alpha^2 - \beta^2} + \beta (x - \mu)}
  4034. In SciPy, this corresponds to
  4035. :math:`a=\alpha \delta, b=\beta \delta, \text{loc}=\mu, \text{scale}=\delta`.
  4036. References
  4037. ----------
  4038. .. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions on
  4039. Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
  4040. pp. 151-157, 1978.
  4041. .. [2] O. Barndorff-Nielsen, "Normal Inverse Gaussian Distributions and
  4042. Stochastic Volatility Modelling", Scandinavian Journal of
  4043. Statistics, Vol. 24, pp. 1-13, 1997.
  4044. %(example)s
  4045. """
  4046. _support_mask = rv_continuous._open_support_mask
  4047. def _argcheck(self, a, b):
  4048. return (a > 0) & (np.absolute(b) < a)
  4049. def _shape_info(self):
  4050. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  4051. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
  4052. return [ia, ib]
  4053. def _fitstart(self, data):
  4054. # Arbitrary, but the default a = b = 1 is not valid; the distribution
  4055. # requires |b| < a.
  4056. return super()._fitstart(data, args=(1, 0.5))
  4057. def _pdf(self, x, a, b):
  4058. gamma = np.sqrt(a**2 - b**2)
  4059. fac1 = a / np.pi
  4060. sq = np.hypot(1, x) # reduce overflows
  4061. return fac1 * sc.k1e(a * sq) * np.exp(b*x - a*sq + gamma) / sq
  4062. def _sf(self, x, a, b):
  4063. if np.isscalar(x):
  4064. # If x is a scalar, then so are a and b.
  4065. return integrate.quad(self._pdf, x, np.inf, args=(a, b))[0]
  4066. else:
  4067. a = np.atleast_1d(a)
  4068. b = np.atleast_1d(b)
  4069. result = []
  4070. for (x0, a0, b0) in zip(x, a, b):
  4071. result.append(integrate.quad(self._pdf, x0, np.inf,
  4072. args=(a0, b0))[0])
  4073. return np.array(result)
  4074. def _isf(self, q, a, b):
  4075. def _isf_scalar(q, a, b):
  4076. def eq(x, a, b, q):
  4077. # Solve eq(x, a, b, q) = 0 to obtain isf(x, a, b) = q.
  4078. return self._sf(x, a, b) - q
  4079. # Find a bracketing interval for the root.
  4080. # Start at the mean, and grow the length of the interval
  4081. # by 2 each iteration until there is a sign change in eq.
  4082. xm = self.mean(a, b)
  4083. em = eq(xm, a, b, q)
  4084. if em == 0:
  4085. # Unlikely, but might as well check.
  4086. return xm
  4087. if em > 0:
  4088. delta = 1
  4089. left = xm
  4090. right = xm + delta
  4091. while eq(right, a, b, q) > 0:
  4092. delta = 2*delta
  4093. right = xm + delta
  4094. else:
  4095. # em < 0
  4096. delta = 1
  4097. right = xm
  4098. left = xm - delta
  4099. while eq(left, a, b, q) < 0:
  4100. delta = 2*delta
  4101. left = xm - delta
  4102. result = optimize.brentq(eq, left, right, args=(a, b, q),
  4103. xtol=self.xtol)
  4104. return result
  4105. if np.isscalar(q):
  4106. return _isf_scalar(q, a, b)
  4107. else:
  4108. result = []
  4109. for (q0, a0, b0) in zip(q, a, b):
  4110. result.append(_isf_scalar(q0, a0, b0))
  4111. return np.array(result)
  4112. def _rvs(self, a, b, size=None, random_state=None):
  4113. # note: X = b * V + sqrt(V) * X is norminvgaus(a,b) if X is standard
  4114. # normal and V is invgauss(mu=1/sqrt(a**2 - b**2))
  4115. gamma = np.sqrt(a**2 - b**2)
  4116. ig = invgauss.rvs(mu=1/gamma, size=size, random_state=random_state)
  4117. return b * ig + np.sqrt(ig) * norm.rvs(size=size,
  4118. random_state=random_state)
  4119. def _stats(self, a, b):
  4120. gamma = np.sqrt(a**2 - b**2)
  4121. mean = b / gamma
  4122. variance = a**2 / gamma**3
  4123. skewness = 3.0 * b / (a * np.sqrt(gamma))
  4124. kurtosis = 3.0 * (1 + 4 * b**2 / a**2) / gamma
  4125. return mean, variance, skewness, kurtosis
  4126. norminvgauss = norminvgauss_gen(name="norminvgauss")
  4127. class invweibull_gen(rv_continuous):
  4128. """An inverted Weibull continuous random variable.
  4129. This distribution is also known as the Fréchet distribution or the
  4130. type II extreme value distribution.
  4131. %(before_notes)s
  4132. Notes
  4133. -----
  4134. The probability density function for `invweibull` is:
  4135. .. math::
  4136. f(x, c) = c x^{-c-1} \\exp(-x^{-c})
  4137. for :math:`x > 0`, :math:`c > 0`.
  4138. `invweibull` takes ``c`` as a shape parameter for :math:`c`.
  4139. %(after_notes)s
  4140. References
  4141. ----------
  4142. F.R.S. de Gusmao, E.M.M Ortega and G.M. Cordeiro, "The generalized inverse
  4143. Weibull distribution", Stat. Papers, vol. 52, pp. 591-619, 2011.
  4144. %(example)s
  4145. """
  4146. _support_mask = rv_continuous._open_support_mask
  4147. def _shape_info(self):
  4148. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  4149. def _pdf(self, x, c):
  4150. # invweibull.pdf(x, c) = c * x**(-c-1) * exp(-x**(-c))
  4151. xc1 = np.power(x, -c - 1.0)
  4152. xc2 = np.power(x, -c)
  4153. xc2 = np.exp(-xc2)
  4154. return c * xc1 * xc2
  4155. def _cdf(self, x, c):
  4156. xc1 = np.power(x, -c)
  4157. return np.exp(-xc1)
  4158. def _sf(self, x, c):
  4159. return -np.expm1(-x**-c)
  4160. def _ppf(self, q, c):
  4161. return np.power(-np.log(q), -1.0/c)
  4162. def _isf(self, p, c):
  4163. return (-np.log1p(-p))**(-1/c)
  4164. def _munp(self, n, c):
  4165. return sc.gamma(1 - n / c)
  4166. def _entropy(self, c):
  4167. return 1+_EULER + _EULER / c - np.log(c)
  4168. def _fitstart(self, data, args=None):
  4169. # invweibull requires c > 1 for the first moment to exist, so use 2.0
  4170. args = (2.0,) if args is None else args
  4171. return super()._fitstart(data, args=args)
  4172. invweibull = invweibull_gen(a=0, name='invweibull')
  4173. class jf_skew_t_gen(rv_continuous):
  4174. r"""Jones and Faddy skew-t distribution.
  4175. %(before_notes)s
  4176. Notes
  4177. -----
  4178. The probability density function for `jf_skew_t` is:
  4179. .. math::
  4180. f(x; a, b) = C_{a,b}^{-1}
  4181. \left(1+\frac{x}{\left(a+b+x^2\right)^{1/2}}\right)^{a+1/2}
  4182. \left(1-\frac{x}{\left(a+b+x^2\right)^{1/2}}\right)^{b+1/2}
  4183. for real numbers :math:`a>0` and :math:`b>0`, where
  4184. :math:`C_{a,b} = 2^{a+b-1}B(a,b)(a+b)^{1/2}`, and :math:`B` denotes the
  4185. beta function (`scipy.special.beta`).
  4186. When :math:`a<b`, the distribution is negatively skewed, and when
  4187. :math:`a>b`, the distribution is positively skewed. If :math:`a=b`, then
  4188. we recover the `t` distribution with :math:`2a` degrees of freedom.
  4189. `jf_skew_t` takes :math:`a` and :math:`b` as shape parameters.
  4190. %(after_notes)s
  4191. References
  4192. ----------
  4193. .. [1] M.C. Jones and M.J. Faddy. "A skew extension of the t distribution,
  4194. with applications" *Journal of the Royal Statistical Society*.
  4195. Series B (Statistical Methodology) 65, no. 1 (2003): 159-174.
  4196. :doi:`10.1111/1467-9868.00378`
  4197. %(example)s
  4198. """
  4199. def _shape_info(self):
  4200. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  4201. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  4202. return [ia, ib]
  4203. def _pdf(self, x, a, b):
  4204. c = 2 ** (a + b - 1) * sc.beta(a, b) * np.sqrt(a + b)
  4205. d1 = (1 + x / np.sqrt(a + b + x ** 2)) ** (a + 0.5)
  4206. d2 = (1 - x / np.sqrt(a + b + x ** 2)) ** (b + 0.5)
  4207. return d1 * d2 / c
  4208. def _rvs(self, a, b, size=None, random_state=None):
  4209. d1 = random_state.beta(a, b, size)
  4210. d2 = (2 * d1 - 1) * np.sqrt(a + b)
  4211. d3 = 2 * np.sqrt(d1 * (1 - d1))
  4212. return d2 / d3
  4213. def _cdf(self, x, a, b):
  4214. y = (1 + x / np.sqrt(a + b + x ** 2)) * 0.5
  4215. return sc.betainc(a, b, y)
  4216. def _sf(self, x, a, b):
  4217. y = (1 + x / np.sqrt(a + b + x ** 2)) * 0.5
  4218. return sc.betaincc(a, b, y)
  4219. def _ppf(self, q, a, b):
  4220. d1 = beta.ppf(q, a, b)
  4221. d2 = (2 * d1 - 1) * np.sqrt(a + b)
  4222. d3 = 2 * np.sqrt(d1 * (1 - d1))
  4223. return d2 / d3
  4224. def _munp(self, n, a, b):
  4225. """Returns the n-th moment(s) where all the following hold:
  4226. - n >= 0
  4227. - a > n / 2
  4228. - b > n / 2
  4229. The result is np.nan in all other cases.
  4230. """
  4231. def nth_moment(n_k, a_k, b_k):
  4232. """Computes E[T^(n_k)] where T is skew-t distributed with
  4233. parameters a_k and b_k.
  4234. """
  4235. num = (a_k + b_k) ** (0.5 * n_k)
  4236. denom = 2 ** n_k * sc.beta(a_k, b_k)
  4237. indices = np.arange(n_k + 1)
  4238. sgn = np.where(indices % 2 > 0, -1, 1)
  4239. d = sc.beta(a_k + 0.5 * n_k - indices, b_k - 0.5 * n_k + indices)
  4240. sum_terms = sc.comb(n_k, indices) * sgn * d
  4241. return num / denom * sum_terms.sum()
  4242. nth_moment_valid = (a > 0.5 * n) & (b > 0.5 * n) & (n >= 0)
  4243. return xpx.apply_where(
  4244. nth_moment_valid,
  4245. (n, a, b),
  4246. np.vectorize(nth_moment, otypes=[np.float64]),
  4247. fill_value=np.nan,
  4248. )
  4249. jf_skew_t = jf_skew_t_gen(name='jf_skew_t')
  4250. class johnsonsb_gen(rv_continuous):
  4251. r"""A Johnson SB continuous random variable.
  4252. %(before_notes)s
  4253. See Also
  4254. --------
  4255. johnsonsu
  4256. Notes
  4257. -----
  4258. The probability density function for `johnsonsb` is:
  4259. .. math::
  4260. f(x, a, b) = \frac{b}{x(1-x)} \phi(a + b \log \frac{x}{1-x} )
  4261. where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`
  4262. and :math:`x \in [0,1]`. :math:`\phi` is the pdf of the normal
  4263. distribution.
  4264. `johnsonsb` takes :math:`a` and :math:`b` as shape parameters.
  4265. %(after_notes)s
  4266. %(example)s
  4267. """
  4268. _support_mask = rv_continuous._open_support_mask
  4269. def _argcheck(self, a, b):
  4270. return (b > 0) & (a == a)
  4271. def _shape_info(self):
  4272. ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
  4273. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  4274. return [ia, ib]
  4275. def _pdf(self, x, a, b):
  4276. # johnsonsb.pdf(x, a, b) = b / (x*(1-x)) * phi(a + b * log(x/(1-x)))
  4277. trm = _norm_pdf(a + b*sc.logit(x))
  4278. return b*1.0/(x*(1-x))*trm
  4279. def _cdf(self, x, a, b):
  4280. return _norm_cdf(a + b*sc.logit(x))
  4281. def _ppf(self, q, a, b):
  4282. return sc.expit(1.0 / b * (_norm_ppf(q) - a))
  4283. def _sf(self, x, a, b):
  4284. return _norm_sf(a + b*sc.logit(x))
  4285. def _isf(self, q, a, b):
  4286. return sc.expit(1.0 / b * (_norm_isf(q) - a))
  4287. johnsonsb = johnsonsb_gen(a=0.0, b=1.0, name='johnsonsb')
  4288. class johnsonsu_gen(rv_continuous):
  4289. r"""A Johnson SU continuous random variable.
  4290. %(before_notes)s
  4291. See Also
  4292. --------
  4293. johnsonsb
  4294. Notes
  4295. -----
  4296. The probability density function for `johnsonsu` is:
  4297. .. math::
  4298. f(x, a, b) = \frac{b}{\sqrt{x^2 + 1}}
  4299. \phi(a + b \log(x + \sqrt{x^2 + 1}))
  4300. where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`.
  4301. :math:`\phi` is the pdf of the normal distribution.
  4302. `johnsonsu` takes :math:`a` and :math:`b` as shape parameters.
  4303. The first four central moments are calculated according to the formulas
  4304. in [1]_.
  4305. %(after_notes)s
  4306. References
  4307. ----------
  4308. .. [1] Taylor Enterprises. "Johnson Family of Distributions".
  4309. https://variation.com/wp-content/distribution_analyzer_help/hs126.htm
  4310. %(example)s
  4311. """
  4312. def _argcheck(self, a, b):
  4313. return (b > 0) & (a == a)
  4314. def _shape_info(self):
  4315. ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
  4316. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  4317. return [ia, ib]
  4318. def _pdf(self, x, a, b):
  4319. # johnsonsu.pdf(x, a, b) = b / sqrt(x**2 + 1) *
  4320. # phi(a + b * log(x + sqrt(x**2 + 1)))
  4321. x2 = x*x
  4322. trm = _norm_pdf(a + b * np.arcsinh(x))
  4323. return b*1.0/np.sqrt(x2+1.0)*trm
  4324. def _cdf(self, x, a, b):
  4325. return _norm_cdf(a + b * np.arcsinh(x))
  4326. def _ppf(self, q, a, b):
  4327. return np.sinh((_norm_ppf(q) - a) / b)
  4328. def _sf(self, x, a, b):
  4329. return _norm_sf(a + b * np.arcsinh(x))
  4330. def _isf(self, x, a, b):
  4331. return np.sinh((_norm_isf(x) - a) / b)
  4332. def _stats(self, a, b, moments='mv'):
  4333. # Naive implementation of first and second moment to address gh-18071.
  4334. # https://variation.com/wp-content/distribution_analyzer_help/hs126.htm
  4335. # Numerical improvements left to future enhancements.
  4336. mu, mu2, g1, g2 = None, None, None, None
  4337. bn2 = b**-2.
  4338. expbn2 = np.exp(bn2)
  4339. a_b = a / b
  4340. if 'm' in moments:
  4341. mu = -expbn2**0.5 * np.sinh(a_b)
  4342. if 'v' in moments:
  4343. mu2 = 0.5*sc.expm1(bn2)*(expbn2*np.cosh(2*a_b) + 1)
  4344. if 's' in moments:
  4345. t1 = expbn2**.5 * sc.expm1(bn2)**0.5
  4346. t2 = 3*np.sinh(a_b)
  4347. t3 = expbn2 * (expbn2 + 2) * np.sinh(3*a_b)
  4348. denom = np.sqrt(2) * (1 + expbn2 * np.cosh(2*a_b))**(3/2)
  4349. g1 = -t1 * (t2 + t3) / denom
  4350. if 'k' in moments:
  4351. t1 = 3 + 6*expbn2
  4352. t2 = 4*expbn2**2 * (expbn2 + 2) * np.cosh(2*a_b)
  4353. t3 = expbn2**2 * np.cosh(4*a_b)
  4354. t4 = -3 + 3*expbn2**2 + 2*expbn2**3 + expbn2**4
  4355. denom = 2*(1 + expbn2*np.cosh(2*a_b))**2
  4356. g2 = (t1 + t2 + t3*t4) / denom - 3
  4357. return mu, mu2, g1, g2
  4358. johnsonsu = johnsonsu_gen(name='johnsonsu')
  4359. class landau_gen(rv_continuous):
  4360. r"""A Landau continuous random variable.
  4361. %(before_notes)s
  4362. Notes
  4363. -----
  4364. The probability density function for `landau` ([1]_, [2]_) is:
  4365. .. math::
  4366. f(x) = \frac{1}{\pi}\int_0^\infty \exp(-t \log t - xt)\sin(\pi t) dt
  4367. for a real number :math:`x`.
  4368. %(after_notes)s
  4369. Often (e.g. [2]_), the Landau distribution is parameterized in terms of a
  4370. location parameter :math:`\mu` and scale parameter :math:`c`, the latter of
  4371. which *also* introduces a location shift. If ``mu`` and ``c`` are used to
  4372. represent these parameters, this corresponds with SciPy's parameterization
  4373. with ``loc = mu + 2*c / np.pi * np.log(c)`` and ``scale = c``.
  4374. This distribution uses routines from the Boost Math C++ library for
  4375. the computation of the ``pdf``, ``cdf``, ``ppf``, ``sf`` and ``isf``
  4376. methods. [1]_
  4377. References
  4378. ----------
  4379. .. [1] Landau, L. (1944). "On the energy loss of fast particles by
  4380. ionization". J. Phys. (USSR). 8: 201.
  4381. .. [2] "Landau Distribution", Wikipedia,
  4382. https://en.wikipedia.org/wiki/Landau_distribution
  4383. .. [3] Chambers, J. M., Mallows, C. L., & Stuck, B. (1976).
  4384. "A method for simulating stable random variables."
  4385. Journal of the American Statistical Association, 71(354), 340-344.
  4386. .. [4] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  4387. .. [5] Yoshimura, T. "Numerical Evaluation and High Precision Approximation
  4388. Formula for Landau Distribution".
  4389. :doi:`10.36227/techrxiv.171822215.53612870/v2`
  4390. %(example)s
  4391. """
  4392. def _shape_info(self):
  4393. return []
  4394. def _entropy(self):
  4395. # Computed with mpmath - see gh-19145
  4396. return 2.37263644000448182
  4397. def _pdf(self, x):
  4398. return scu._landau_pdf(x, 0, 1)
  4399. def _cdf(self, x):
  4400. return scu._landau_cdf(x, 0, 1)
  4401. def _sf(self, x):
  4402. return scu._landau_sf(x, 0, 1)
  4403. def _ppf(self, p):
  4404. return scu._landau_ppf(p, 0, 1)
  4405. def _isf(self, p):
  4406. return scu._landau_isf(p, 0, 1)
  4407. def _stats(self):
  4408. return np.nan, np.nan, np.nan, np.nan
  4409. def _munp(self, n):
  4410. return np.nan if n > 0 else 1
  4411. def _fitstart(self, data, args=None):
  4412. # Initialize ML guesses using quartiles instead of moments.
  4413. if isinstance(data, CensoredData):
  4414. data = data._uncensor()
  4415. p25, p50, p75 = np.percentile(data, [25, 50, 75])
  4416. return p50, (p75 - p25)/2
  4417. def _rvs(self, size=None, random_state=None):
  4418. # Method from https://www.jstor.org/stable/2285309 Eq. 2.4
  4419. pi_2 = np.pi / 2
  4420. U = random_state.uniform(-np.pi / 2, np.pi / 2, size=size)
  4421. W = random_state.standard_exponential(size=size)
  4422. S = 2 / np.pi * ((pi_2 + U) * np.tan(U)
  4423. - np.log((pi_2 * W * np.cos(U)) / (pi_2 + U)))
  4424. return S
  4425. landau = landau_gen(name='landau')
  4426. class laplace_gen(rv_continuous):
  4427. r"""A Laplace continuous random variable.
  4428. %(before_notes)s
  4429. Notes
  4430. -----
  4431. The probability density function for `laplace` is
  4432. .. math::
  4433. f(x) = \frac{1}{2} \exp(-|x|)
  4434. for a real number :math:`x`.
  4435. %(after_notes)s
  4436. %(example)s
  4437. """
  4438. def _shape_info(self):
  4439. return []
  4440. def _rvs(self, size=None, random_state=None):
  4441. return random_state.laplace(0, 1, size=size)
  4442. def _pdf(self, x):
  4443. # laplace.pdf(x) = 1/2 * exp(-abs(x))
  4444. return 0.5*np.exp(-abs(x))
  4445. def _cdf(self, x):
  4446. with np.errstate(over='ignore'):
  4447. return np.where(x > 0, 1.0 - 0.5*np.exp(-x), 0.5*np.exp(x))
  4448. def _sf(self, x):
  4449. # By symmetry...
  4450. return self._cdf(-x)
  4451. def _ppf(self, q):
  4452. return np.where(q > 0.5, -np.log(2*(1-q)), np.log(2*q))
  4453. def _isf(self, q):
  4454. # By symmetry...
  4455. return -self._ppf(q)
  4456. def _stats(self):
  4457. return 0, 2, 0, 3
  4458. def _entropy(self):
  4459. return np.log(2)+1
  4460. @_call_super_mom
  4461. @replace_notes_in_docstring(rv_continuous, notes="""\
  4462. This function uses explicit formulas for the maximum likelihood
  4463. estimation of the Laplace distribution parameters, so the keyword
  4464. arguments `loc`, `scale`, and `optimizer` are ignored.\n\n""")
  4465. def fit(self, data, *args, **kwds):
  4466. data, floc, fscale = _check_fit_input_parameters(self, data,
  4467. args, kwds)
  4468. # Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
  4469. # and Peacock (2000), Page 124
  4470. if floc is None:
  4471. floc = np.median(data)
  4472. if fscale is None:
  4473. fscale = (np.sum(np.abs(data - floc))) / len(data)
  4474. return floc, fscale
  4475. laplace = laplace_gen(name='laplace')
  4476. class laplace_asymmetric_gen(rv_continuous):
  4477. r"""An asymmetric Laplace continuous random variable.
  4478. %(before_notes)s
  4479. See Also
  4480. --------
  4481. laplace : Laplace distribution
  4482. Notes
  4483. -----
  4484. The probability density function for `laplace_asymmetric` is
  4485. .. math::
  4486. f(x, \kappa) &= \frac{1}{\kappa+\kappa^{-1}}\exp(-x\kappa),\quad x\ge0\\
  4487. &= \frac{1}{\kappa+\kappa^{-1}}\exp(x/\kappa),\quad x<0\\
  4488. for :math:`-\infty < x < \infty`, :math:`\kappa > 0`.
  4489. `laplace_asymmetric` takes ``kappa`` as a shape parameter for
  4490. :math:`\kappa`. For :math:`\kappa = 1`, it is identical to a
  4491. Laplace distribution.
  4492. %(after_notes)s
  4493. Note that the scale parameter of some references is the reciprocal of
  4494. SciPy's ``scale``. For example, :math:`\lambda = 1/2` in the
  4495. parameterization of [1]_ is equivalent to ``scale = 2`` with
  4496. `laplace_asymmetric`.
  4497. References
  4498. ----------
  4499. .. [1] "Asymmetric Laplace distribution", Wikipedia
  4500. https://en.wikipedia.org/wiki/Asymmetric_Laplace_distribution
  4501. .. [2] Kozubowski TJ and Podgórski K. A Multivariate and
  4502. Asymmetric Generalization of Laplace Distribution,
  4503. Computational Statistics 15, 531--540 (2000).
  4504. :doi:`10.1007/PL00022717`
  4505. %(example)s
  4506. """
  4507. def _shape_info(self):
  4508. return [_ShapeInfo("kappa", False, (0, np.inf), (False, False))]
  4509. def _pdf(self, x, kappa):
  4510. return np.exp(self._logpdf(x, kappa))
  4511. def _logpdf(self, x, kappa):
  4512. kapinv = 1/kappa
  4513. lPx = x * np.where(x >= 0, -kappa, kapinv)
  4514. lPx -= np.log(kappa+kapinv)
  4515. return lPx
  4516. def _cdf(self, x, kappa):
  4517. kapinv = 1/kappa
  4518. kappkapinv = kappa+kapinv
  4519. return np.where(x >= 0,
  4520. 1 - np.exp(-x*kappa)*(kapinv/kappkapinv),
  4521. np.exp(x*kapinv)*(kappa/kappkapinv))
  4522. def _sf(self, x, kappa):
  4523. kapinv = 1/kappa
  4524. kappkapinv = kappa+kapinv
  4525. return np.where(x >= 0,
  4526. np.exp(-x*kappa)*(kapinv/kappkapinv),
  4527. 1 - np.exp(x*kapinv)*(kappa/kappkapinv))
  4528. def _ppf(self, q, kappa):
  4529. kapinv = 1/kappa
  4530. kappkapinv = kappa+kapinv
  4531. return np.where(q >= kappa/kappkapinv,
  4532. -np.log((1 - q)*kappkapinv*kappa)*kapinv,
  4533. np.log(q*kappkapinv/kappa)*kappa)
  4534. def _isf(self, q, kappa):
  4535. kapinv = 1/kappa
  4536. kappkapinv = kappa+kapinv
  4537. return np.where(q <= kapinv/kappkapinv,
  4538. -np.log(q*kappkapinv*kappa)*kapinv,
  4539. np.log((1 - q)*kappkapinv/kappa)*kappa)
  4540. def _stats(self, kappa):
  4541. kapinv = 1/kappa
  4542. mn = kapinv - kappa
  4543. var = kapinv*kapinv + kappa*kappa
  4544. g1 = 2.0*(1-np.power(kappa, 6))/np.power(1+np.power(kappa, 4), 1.5)
  4545. g2 = 6.0*(1+np.power(kappa, 8))/np.power(1+np.power(kappa, 4), 2)
  4546. return mn, var, g1, g2
  4547. def _entropy(self, kappa):
  4548. return 1 + np.log(kappa+1/kappa)
  4549. laplace_asymmetric = laplace_asymmetric_gen(name='laplace_asymmetric')
  4550. def _check_fit_input_parameters(dist, data, args, kwds):
  4551. if not isinstance(data, CensoredData):
  4552. data = np.asarray(data)
  4553. floc = kwds.get('floc', None)
  4554. fscale = kwds.get('fscale', None)
  4555. num_shapes = len(dist.shapes.split(",")) if dist.shapes else 0
  4556. fshape_keys = []
  4557. fshapes = []
  4558. # user has many options for fixing the shape, so here we standardize it
  4559. # into 'f' + the number of the shape.
  4560. # Adapted from `_reduce_func` in `_distn_infrastructure.py`:
  4561. if dist.shapes:
  4562. shapes = dist.shapes.replace(',', ' ').split()
  4563. for j, s in enumerate(shapes):
  4564. key = 'f' + str(j)
  4565. names = [key, 'f' + s, 'fix_' + s]
  4566. val = _get_fixed_fit_value(kwds, names)
  4567. fshape_keys.append(key)
  4568. fshapes.append(val)
  4569. if val is not None:
  4570. kwds[key] = val
  4571. # determine if there are any unknown arguments in kwds
  4572. known_keys = {'loc', 'scale', 'optimizer', 'method',
  4573. 'floc', 'fscale', *fshape_keys}
  4574. unknown_keys = set(kwds).difference(known_keys)
  4575. if unknown_keys:
  4576. raise TypeError(f"Unknown keyword arguments: {unknown_keys}.")
  4577. if len(args) > num_shapes:
  4578. raise TypeError("Too many positional arguments.")
  4579. if None not in {floc, fscale, *fshapes}:
  4580. # This check is for consistency with `rv_continuous.fit`.
  4581. # Without this check, this function would just return the
  4582. # parameters that were given.
  4583. raise RuntimeError("All parameters fixed. There is nothing to "
  4584. "optimize.")
  4585. uncensored = data._uncensor() if isinstance(data, CensoredData) else data
  4586. if not np.isfinite(uncensored).all():
  4587. raise ValueError("The data contains non-finite values.")
  4588. return (data, *fshapes, floc, fscale)
  4589. class levy_gen(rv_continuous):
  4590. r"""A Levy continuous random variable.
  4591. %(before_notes)s
  4592. See Also
  4593. --------
  4594. levy_stable, levy_l
  4595. Notes
  4596. -----
  4597. The probability density function for `levy` is:
  4598. .. math::
  4599. f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp\left(-\frac{1}{2x}\right)
  4600. for :math:`x > 0`.
  4601. This is the same as the Levy-stable distribution with :math:`a=1/2` and
  4602. :math:`b=1`.
  4603. %(after_notes)s
  4604. Examples
  4605. --------
  4606. >>> import numpy as np
  4607. >>> from scipy.stats import levy
  4608. >>> import matplotlib.pyplot as plt
  4609. >>> fig, ax = plt.subplots(1, 1)
  4610. Calculate the first four moments:
  4611. >>> mean, var, skew, kurt = levy.stats(moments='mvsk')
  4612. Display the probability density function (``pdf``):
  4613. >>> # `levy` is very heavy-tailed.
  4614. >>> # To show a nice plot, let's cut off the upper 40 percent.
  4615. >>> a, b = levy.ppf(0), levy.ppf(0.6)
  4616. >>> x = np.linspace(a, b, 100)
  4617. >>> ax.plot(x, levy.pdf(x),
  4618. ... 'r-', lw=5, alpha=0.6, label='levy pdf')
  4619. Alternatively, the distribution object can be called (as a function)
  4620. to fix the shape, location and scale parameters. This returns a "frozen"
  4621. RV object holding the given parameters fixed.
  4622. Freeze the distribution and display the frozen ``pdf``:
  4623. >>> rv = levy()
  4624. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  4625. Check accuracy of ``cdf`` and ``ppf``:
  4626. >>> vals = levy.ppf([0.001, 0.5, 0.999])
  4627. >>> np.allclose([0.001, 0.5, 0.999], levy.cdf(vals))
  4628. True
  4629. Generate random numbers:
  4630. >>> r = levy.rvs(size=1000)
  4631. And compare the histogram:
  4632. >>> # manual binning to ignore the tail
  4633. >>> bins = np.concatenate((np.linspace(a, b, 20), [np.max(r)]))
  4634. >>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
  4635. >>> ax.set_xlim([x[0], x[-1]])
  4636. >>> ax.legend(loc='best', frameon=False)
  4637. >>> plt.show()
  4638. """
  4639. _support_mask = rv_continuous._open_support_mask
  4640. def _shape_info(self):
  4641. return []
  4642. def _pdf(self, x):
  4643. # levy.pdf(x) = 1 / (x * sqrt(2*pi*x)) * exp(-1/(2*x))
  4644. return 1 / np.sqrt(2*np.pi*x) / x * np.exp(-1/(2*x))
  4645. def _cdf(self, x):
  4646. # Equivalent to 2*norm.sf(np.sqrt(1/x))
  4647. return sc.erfc(np.sqrt(0.5 / x))
  4648. def _sf(self, x):
  4649. return sc.erf(np.sqrt(0.5 / x))
  4650. def _ppf(self, q):
  4651. # Equivalent to 1.0/(norm.isf(q/2)**2) or 0.5/(erfcinv(q)**2)
  4652. val = _norm_isf(q/2)
  4653. return 1.0 / (val * val)
  4654. def _isf(self, p):
  4655. return 1/(2*sc.erfinv(p)**2)
  4656. def _stats(self):
  4657. return np.inf, np.inf, np.nan, np.nan
  4658. levy = levy_gen(a=0.0, name="levy")
  4659. class levy_l_gen(rv_continuous):
  4660. r"""A left-skewed Levy continuous random variable.
  4661. %(before_notes)s
  4662. See Also
  4663. --------
  4664. levy, levy_stable
  4665. Notes
  4666. -----
  4667. The probability density function for `levy_l` is:
  4668. .. math::
  4669. f(x) = \frac{1}{|x| \sqrt{2\pi |x|}} \exp{ \left(-\frac{1}{2|x|} \right)}
  4670. for :math:`x < 0`.
  4671. This is the same as the Levy-stable distribution with :math:`a=1/2` and
  4672. :math:`b=-1`.
  4673. %(after_notes)s
  4674. Examples
  4675. --------
  4676. >>> import numpy as np
  4677. >>> from scipy.stats import levy_l
  4678. >>> import matplotlib.pyplot as plt
  4679. >>> fig, ax = plt.subplots(1, 1)
  4680. Calculate the first four moments:
  4681. >>> mean, var, skew, kurt = levy_l.stats(moments='mvsk')
  4682. Display the probability density function (``pdf``):
  4683. >>> # `levy_l` is very heavy-tailed.
  4684. >>> # To show a nice plot, let's cut off the lower 40 percent.
  4685. >>> a, b = levy_l.ppf(0.4), levy_l.ppf(1)
  4686. >>> x = np.linspace(a, b, 100)
  4687. >>> ax.plot(x, levy_l.pdf(x),
  4688. ... 'r-', lw=5, alpha=0.6, label='levy_l pdf')
  4689. Alternatively, the distribution object can be called (as a function)
  4690. to fix the shape, location and scale parameters. This returns a "frozen"
  4691. RV object holding the given parameters fixed.
  4692. Freeze the distribution and display the frozen ``pdf``:
  4693. >>> rv = levy_l()
  4694. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  4695. Check accuracy of ``cdf`` and ``ppf``:
  4696. >>> vals = levy_l.ppf([0.001, 0.5, 0.999])
  4697. >>> np.allclose([0.001, 0.5, 0.999], levy_l.cdf(vals))
  4698. True
  4699. Generate random numbers:
  4700. >>> r = levy_l.rvs(size=1000)
  4701. And compare the histogram:
  4702. >>> # manual binning to ignore the tail
  4703. >>> bins = np.concatenate(([np.min(r)], np.linspace(a, b, 20)))
  4704. >>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
  4705. >>> ax.set_xlim([x[0], x[-1]])
  4706. >>> ax.legend(loc='best', frameon=False)
  4707. >>> plt.show()
  4708. """
  4709. _support_mask = rv_continuous._open_support_mask
  4710. def _shape_info(self):
  4711. return []
  4712. def _pdf(self, x):
  4713. # levy_l.pdf(x) = 1 / (abs(x) * sqrt(2*pi*abs(x))) * exp(-1/(2*abs(x)))
  4714. ax = abs(x)
  4715. return 1/np.sqrt(2*np.pi*ax)/ax*np.exp(-1/(2*ax))
  4716. def _cdf(self, x):
  4717. ax = abs(x)
  4718. return 2 * _norm_cdf(1 / np.sqrt(ax)) - 1
  4719. def _sf(self, x):
  4720. ax = abs(x)
  4721. return 2 * _norm_sf(1 / np.sqrt(ax))
  4722. def _ppf(self, q):
  4723. val = _norm_ppf((q + 1.0) / 2)
  4724. return -1.0 / (val * val)
  4725. def _isf(self, p):
  4726. return -1/_norm_isf(p/2)**2
  4727. def _stats(self):
  4728. return np.inf, np.inf, np.nan, np.nan
  4729. levy_l = levy_l_gen(b=0.0, name="levy_l")
  4730. class logistic_gen(rv_continuous):
  4731. r"""A logistic (or Sech-squared) continuous random variable.
  4732. %(before_notes)s
  4733. Notes
  4734. -----
  4735. The probability density function for `logistic` is:
  4736. .. math::
  4737. f(x) = \frac{\exp(-x)}
  4738. {(1+\exp(-x))^2}
  4739. `logistic` is a special case of `genlogistic` with ``c=1``.
  4740. Remark that the survival function (``logistic.sf``) is equal to the
  4741. Fermi-Dirac distribution describing fermionic statistics.
  4742. %(after_notes)s
  4743. %(example)s
  4744. """
  4745. def _shape_info(self):
  4746. return []
  4747. def _rvs(self, size=None, random_state=None):
  4748. return random_state.logistic(size=size)
  4749. def _pdf(self, x):
  4750. # logistic.pdf(x) = exp(-x) / (1+exp(-x))**2
  4751. return np.exp(self._logpdf(x))
  4752. def _logpdf(self, x):
  4753. y = -np.abs(x)
  4754. return y - 2. * sc.log1p(np.exp(y))
  4755. def _cdf(self, x):
  4756. return sc.expit(x)
  4757. def _logcdf(self, x):
  4758. return sc.log_expit(x)
  4759. def _ppf(self, q):
  4760. return sc.logit(q)
  4761. def _sf(self, x):
  4762. return sc.expit(-x)
  4763. def _logsf(self, x):
  4764. return sc.log_expit(-x)
  4765. def _isf(self, q):
  4766. return -sc.logit(q)
  4767. def _stats(self):
  4768. return 0, np.pi*np.pi/3.0, 0, 6.0/5.0
  4769. def _entropy(self):
  4770. # https://en.wikipedia.org/wiki/Logistic_distribution
  4771. return 2.0
  4772. @_call_super_mom
  4773. @inherit_docstring_from(rv_continuous)
  4774. def fit(self, data, *args, **kwds):
  4775. if kwds.pop('superfit', False):
  4776. return super().fit(data, *args, **kwds)
  4777. data, floc, fscale = _check_fit_input_parameters(self, data,
  4778. args, kwds)
  4779. n = len(data)
  4780. # rv_continuous provided guesses
  4781. loc, scale = self._fitstart(data)
  4782. # these are trumped by user-provided guesses
  4783. loc, scale = kwds.get('loc', loc), kwds.get('scale', scale)
  4784. # the maximum likelihood estimators `a` and `b` of the location and
  4785. # scale parameters are roots of the two equations described in `func`.
  4786. # Source: Statistical Distributions, 3rd Edition. Evans, Hastings, and
  4787. # Peacock (2000), Page 130
  4788. def dl_dloc(loc, scale=fscale):
  4789. c = (data - loc) / scale
  4790. return np.sum(sc.expit(c)) - n/2
  4791. def dl_dscale(scale, loc=floc):
  4792. c = (data - loc) / scale
  4793. return np.sum(c*np.tanh(c/2)) - n
  4794. def func(params):
  4795. loc, scale = params
  4796. return dl_dloc(loc, scale), dl_dscale(scale, loc)
  4797. if fscale is not None and floc is None:
  4798. res = optimize.root(dl_dloc, (loc,))
  4799. loc = res.x[0]
  4800. scale = fscale
  4801. elif floc is not None and fscale is None:
  4802. res = optimize.root(dl_dscale, (scale,))
  4803. scale = res.x[0]
  4804. loc = floc
  4805. else:
  4806. res = optimize.root(func, (loc, scale))
  4807. loc, scale = res.x
  4808. # Note: gh-18176 reported data for which the reported MLE had
  4809. # `scale < 0`. To fix the bug, we return abs(scale). This is OK because
  4810. # `dl_dscale` and `dl_dloc` are even and odd functions of `scale`,
  4811. # respectively, so if `-scale` is a solution, so is `scale`.
  4812. scale = abs(scale)
  4813. return ((loc, scale) if res.success
  4814. else super().fit(data, *args, **kwds))
  4815. logistic = logistic_gen(name='logistic')
  4816. class loggamma_gen(rv_continuous):
  4817. r"""A log gamma continuous random variable.
  4818. %(before_notes)s
  4819. Notes
  4820. -----
  4821. The probability density function for `loggamma` is:
  4822. .. math::
  4823. f(x, c) = \frac{\exp(c x - \exp(x))}
  4824. {\Gamma(c)}
  4825. for all :math:`x, c > 0`. Here, :math:`\Gamma` is the
  4826. gamma function (`scipy.special.gamma`).
  4827. `loggamma` takes ``c`` as a shape parameter for :math:`c`.
  4828. %(after_notes)s
  4829. %(example)s
  4830. """
  4831. def _shape_info(self):
  4832. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  4833. def _rvs(self, c, size=None, random_state=None):
  4834. # Use the property of the gamma distribution Gamma(c)
  4835. # Gamma(c) ~ Gamma(c + 1)*U**(1/c),
  4836. # where U is uniform on [0, 1]. (See, e.g.,
  4837. # G. Marsaglia and W.W. Tsang, "A simple method for generating gamma
  4838. # variables", https://doi.org/10.1145/358407.358414)
  4839. # So
  4840. # log(Gamma(c)) ~ log(Gamma(c + 1)) + log(U)/c
  4841. # Generating a sample with this formulation is a bit slower
  4842. # than the more obvious log(Gamma(c)), but it avoids loss
  4843. # of precision when c << 1.
  4844. return (np.log(random_state.gamma(c + 1, size=size))
  4845. + np.log(random_state.uniform(size=size))/c)
  4846. def _pdf(self, x, c):
  4847. # loggamma.pdf(x, c) = exp(c*x-exp(x)) / gamma(c)
  4848. return np.exp(c*x-np.exp(x)-sc.gammaln(c))
  4849. def _logpdf(self, x, c):
  4850. return c*x - np.exp(x) - sc.gammaln(c)
  4851. def _cdf(self, x, c):
  4852. # This function is gammainc(c, exp(x)), where gammainc(c, z) is
  4853. # the regularized incomplete gamma function.
  4854. # The first term in a series expansion of gamminc(c, z) is
  4855. # z**c/Gamma(c+1); see 6.5.29 of Abramowitz & Stegun (and refer
  4856. # back to 6.5.1, 6.5.2 and 6.5.4 for the relevant notation).
  4857. # This can also be found in the wikipedia article
  4858. # https://en.wikipedia.org/wiki/Incomplete_gamma_function.
  4859. # Here we use that formula when x is sufficiently negative that
  4860. # exp(x) will result in subnormal numbers and lose precision.
  4861. # We evaluate the log of the expression first to allow the possible
  4862. # cancellation of the terms in the division, and then exponentiate.
  4863. # That is,
  4864. # exp(x)**c/Gamma(c+1) = exp(log(exp(x)**c/Gamma(c+1)))
  4865. # = exp(c*x - gammaln(c+1))
  4866. return xpx.apply_where(
  4867. x < _LOGXMIN, (x, c),
  4868. lambda x, c: np.exp(c*x - sc.gammaln(c+1)),
  4869. lambda x, c: sc.gammainc(c, np.exp(x)))
  4870. def _ppf(self, q, c):
  4871. # The expression used when g < _XMIN inverts the one term expansion
  4872. # given in the comments of _cdf().
  4873. g = sc.gammaincinv(c, q)
  4874. return xpx.apply_where(
  4875. g < _XMIN, (g, q, c),
  4876. lambda g, q, c: (np.log(q) + sc.gammaln(c+1))/c,
  4877. lambda g, q, c: np.log(g))
  4878. def _sf(self, x, c):
  4879. # See the comments for _cdf() for how x < _LOGXMIN is handled.
  4880. return xpx.apply_where(
  4881. x < _LOGXMIN, (x, c),
  4882. lambda x, c: -np.expm1(c*x - sc.gammaln(c+1)),
  4883. lambda x, c: sc.gammaincc(c, np.exp(x)))
  4884. def _isf(self, q, c):
  4885. # The expression used when g < _XMIN inverts the complement of
  4886. # the one term expansion given in the comments of _cdf().
  4887. g = sc.gammainccinv(c, q)
  4888. return xpx.apply_where(
  4889. g < _XMIN, (g, q, c),
  4890. lambda g, q, c: (np.log1p(-q) + sc.gammaln(c+1))/c,
  4891. lambda g, q, c: np.log(g))
  4892. def _stats(self, c):
  4893. # See, for example, "A Statistical Study of Log-Gamma Distribution", by
  4894. # Ping Shing Chan (thesis, McMaster University, 1993).
  4895. mean = sc.digamma(c)
  4896. var = sc.polygamma(1, c)
  4897. skewness = sc.polygamma(2, c) / np.power(var, 1.5)
  4898. excess_kurtosis = sc.polygamma(3, c) / (var*var)
  4899. return mean, var, skewness, excess_kurtosis
  4900. def _entropy(self, c):
  4901. def regular(c):
  4902. h = sc.gammaln(c) - c * sc.digamma(c) + c
  4903. return h
  4904. def asymptotic(c):
  4905. # using asymptotic expansions for gammaln and psi (see gh-18093)
  4906. term = -0.5*np.log(c) + c**-1./6 - c**-3./90 + c**-5./210
  4907. h = norm._entropy() + term
  4908. return h
  4909. return xpx.apply_where(c >= 45, c, asymptotic, regular)
  4910. loggamma = loggamma_gen(name='loggamma')
  4911. class loglaplace_gen(rv_continuous):
  4912. r"""A log-Laplace continuous random variable.
  4913. %(before_notes)s
  4914. Notes
  4915. -----
  4916. The probability density function for `loglaplace` is:
  4917. .. math::
  4918. f(x, c) = \begin{cases}\frac{c}{2} x^{ c-1} &\text{for } 0 < x < 1\\
  4919. \frac{c}{2} x^{-c-1} &\text{for } x \ge 1
  4920. \end{cases}
  4921. for :math:`c > 0`.
  4922. `loglaplace` takes ``c`` as a shape parameter for :math:`c`.
  4923. %(after_notes)s
  4924. Suppose a random variable ``X`` follows the Laplace distribution with
  4925. location ``a`` and scale ``b``. Then ``Y = exp(X)`` follows the
  4926. log-Laplace distribution with ``c = 1 / b`` and ``scale = exp(a)``.
  4927. References
  4928. ----------
  4929. T.J. Kozubowski and K. Podgorski, "A log-Laplace growth rate model",
  4930. The Mathematical Scientist, vol. 28, pp. 49-60, 2003.
  4931. %(example)s
  4932. """
  4933. def _shape_info(self):
  4934. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  4935. def _pdf(self, x, c):
  4936. # loglaplace.pdf(x, c) = c / 2 * x**(c-1), for 0 < x < 1
  4937. # = c / 2 * x**(-c-1), for x >= 1
  4938. cd2 = c/2.0
  4939. c = np.where(x < 1, c, -c)
  4940. return cd2*x**(c-1)
  4941. def _cdf(self, x, c):
  4942. return np.where(x < 1, 0.5*x**c, 1-0.5*x**(-c))
  4943. def _sf(self, x, c):
  4944. return np.where(x < 1, 1 - 0.5*x**c, 0.5*x**(-c))
  4945. def _ppf(self, q, c):
  4946. return np.where(q < 0.5, (2.0*q)**(1.0/c), (2*(1.0-q))**(-1.0/c))
  4947. def _isf(self, q, c):
  4948. return np.where(q > 0.5, (2.0*(1.0 - q))**(1.0/c), (2*q)**(-1.0/c))
  4949. def _munp(self, n, c):
  4950. with np.errstate(divide='ignore'):
  4951. c2, n2 = c**2, n**2
  4952. return np.where(n2 < c2, c2 / (c2 - n2), np.inf)
  4953. def _entropy(self, c):
  4954. return np.log(2.0/c) + 1.0
  4955. @_call_super_mom
  4956. @inherit_docstring_from(rv_continuous)
  4957. def fit(self, data, *args, **kwds):
  4958. data, fc, floc, fscale = _check_fit_input_parameters(self, data,
  4959. args, kwds)
  4960. # Specialize MLE only when location is known.
  4961. if floc is None:
  4962. return super(type(self), self).fit(data, *args, **kwds)
  4963. # Raise an error if any observation has zero likelihood.
  4964. if np.any(data <= floc):
  4965. raise FitDataError("loglaplace", lower=floc, upper=np.inf)
  4966. # Remove location from data.
  4967. if floc != 0:
  4968. data = data - floc
  4969. # When location is zero, the log-Laplace distribution is related to
  4970. # the Laplace distribution in that if X ~ Laplace(loc=a, scale=b),
  4971. # then Y = exp(X) ~ LogLaplace(c=1/b, loc=0, scale=exp(a)). It can
  4972. # be shown that the MLE for Y is the same as the MLE for X = ln(Y).
  4973. # Therefore, we reuse the formulas from laplace.fit() and transform
  4974. # the result back into log-laplace's parameter space.
  4975. a, b = laplace.fit(np.log(data),
  4976. floc=np.log(fscale) if fscale is not None else None,
  4977. fscale=1/fc if fc is not None else None,
  4978. method='mle')
  4979. loc = floc
  4980. scale = np.exp(a) if fscale is None else fscale
  4981. c = 1 / b if fc is None else fc
  4982. return c, loc, scale
  4983. loglaplace = loglaplace_gen(a=0.0, name='loglaplace')
  4984. def _lognorm_logpdf(x, s):
  4985. return xpx.apply_where(
  4986. x != 0, (x, s),
  4987. lambda x, s: (-np.log(x)**2 / (2 * s**2)
  4988. - np.log(s * x * np.sqrt(2 * np.pi))),
  4989. fill_value=-np.inf)
  4990. class lognorm_gen(rv_continuous):
  4991. r"""A lognormal continuous random variable.
  4992. %(before_notes)s
  4993. Notes
  4994. -----
  4995. The probability density function for `lognorm` is:
  4996. .. math::
  4997. f(x, s) = \frac{1}{s x \sqrt{2\pi}}
  4998. \exp\left(-\frac{\log^2(x)}{2s^2}\right)
  4999. for :math:`x > 0`, :math:`s > 0`.
  5000. `lognorm` takes ``s`` as a shape parameter for :math:`s`.
  5001. %(after_notes)s
  5002. Suppose a normally distributed random variable ``X`` has mean ``mu`` and
  5003. standard deviation ``sigma``. Then ``Y = exp(X)`` is lognormally
  5004. distributed with ``s = sigma`` and ``scale = exp(mu)``.
  5005. %(example)s
  5006. The logarithm of a log-normally distributed random variable is
  5007. normally distributed:
  5008. >>> import numpy as np
  5009. >>> import matplotlib.pyplot as plt
  5010. >>> from scipy import stats
  5011. >>> fig, ax = plt.subplots(1, 1)
  5012. >>> mu, sigma = 2, 0.5
  5013. >>> X = stats.norm(loc=mu, scale=sigma)
  5014. >>> Y = stats.lognorm(s=sigma, scale=np.exp(mu))
  5015. >>> x = np.linspace(*X.interval(0.999))
  5016. >>> y = Y.rvs(size=10000)
  5017. >>> ax.plot(x, X.pdf(x), label='X (pdf)')
  5018. >>> ax.hist(np.log(y), density=True, bins=x, label='log(Y) (histogram)')
  5019. >>> ax.legend()
  5020. >>> plt.show()
  5021. """
  5022. _support_mask = rv_continuous._open_support_mask
  5023. def _shape_info(self):
  5024. return [_ShapeInfo("s", False, (0, np.inf), (False, False))]
  5025. def _rvs(self, s, size=None, random_state=None):
  5026. return np.exp(s * random_state.standard_normal(size))
  5027. def _pdf(self, x, s):
  5028. # lognorm.pdf(x, s) = 1 / (s*x*sqrt(2*pi)) * exp(-1/2*(log(x)/s)**2)
  5029. return np.exp(self._logpdf(x, s))
  5030. def _logpdf(self, x, s):
  5031. return _lognorm_logpdf(x, s)
  5032. def _cdf(self, x, s):
  5033. return _norm_cdf(np.log(x) / s)
  5034. def _logcdf(self, x, s):
  5035. return _norm_logcdf(np.log(x) / s)
  5036. def _ppf(self, q, s):
  5037. return np.exp(s * _norm_ppf(q))
  5038. def _sf(self, x, s):
  5039. return _norm_sf(np.log(x) / s)
  5040. def _logsf(self, x, s):
  5041. return _norm_logsf(np.log(x) / s)
  5042. def _isf(self, q, s):
  5043. return np.exp(s * _norm_isf(q))
  5044. def _stats(self, s):
  5045. p = np.exp(s*s)
  5046. mu = np.sqrt(p)
  5047. mu2 = p*(p-1)
  5048. g1 = np.sqrt(p-1)*(2+p)
  5049. g2 = np.polyval([1, 2, 3, 0, -6.0], p)
  5050. return mu, mu2, g1, g2
  5051. def _entropy(self, s):
  5052. return 0.5 * (1 + np.log(2*np.pi) + 2 * np.log(s))
  5053. @_call_super_mom
  5054. @extend_notes_in_docstring(rv_continuous, notes="""\
  5055. When `method='MLE'` and
  5056. the location parameter is fixed by using the `floc` argument,
  5057. this function uses explicit formulas for the maximum likelihood
  5058. estimation of the log-normal shape and scale parameters, so the
  5059. `optimizer`, `loc` and `scale` keyword arguments are ignored.
  5060. If the location is free, a likelihood maximum is found by
  5061. setting its partial derivative wrt to location to 0, and
  5062. solving by substituting the analytical expressions of shape
  5063. and scale (or provided parameters).
  5064. See, e.g., equation 3.1 in
  5065. A. Clifford Cohen & Betty Jones Whitten (1980)
  5066. Estimation in the Three-Parameter Lognormal Distribution,
  5067. Journal of the American Statistical Association, 75:370, 399-404
  5068. https://doi.org/10.2307/2287466
  5069. \n\n""")
  5070. def fit(self, data, *args, **kwds):
  5071. if kwds.pop('superfit', False):
  5072. return super().fit(data, *args, **kwds)
  5073. parameters = _check_fit_input_parameters(self, data, args, kwds)
  5074. data, fshape, floc, fscale = parameters
  5075. data_min = np.min(data)
  5076. def get_shape_scale(loc):
  5077. # Calculate maximum likelihood scale and shape with analytical
  5078. # formulas unless provided by the user
  5079. if fshape is None or fscale is None:
  5080. lndata = np.log(data - loc)
  5081. scale = fscale or np.exp(lndata.mean())
  5082. shape = fshape or np.sqrt(np.mean((lndata - np.log(scale))**2))
  5083. return shape, scale
  5084. def dL_dLoc(loc):
  5085. # Derivative of (positive) LL w.r.t. loc
  5086. shape, scale = get_shape_scale(loc)
  5087. shifted = data - loc
  5088. return np.sum((1 + np.log(shifted/scale)/shape**2)/shifted)
  5089. def ll(loc):
  5090. # (Positive) log-likelihood
  5091. shape, scale = get_shape_scale(loc)
  5092. return -self.nnlf((shape, loc, scale), data)
  5093. if floc is None:
  5094. # The location must be less than the minimum of the data.
  5095. # Back off a bit to avoid numerical issues.
  5096. spacing = np.spacing(data_min)
  5097. rbrack = data_min - spacing
  5098. # Find the right end of the bracket by successive doubling of the
  5099. # distance to data_min. We're interested in a maximum LL, so the
  5100. # slope dL_dLoc_rbrack should be negative at the right end.
  5101. # optimization for later: share shape, scale
  5102. dL_dLoc_rbrack = dL_dLoc(rbrack)
  5103. ll_rbrack = ll(rbrack)
  5104. delta = 2 * spacing # 2 * (data_min - rbrack)
  5105. while dL_dLoc_rbrack >= -1e-6:
  5106. rbrack = data_min - delta
  5107. dL_dLoc_rbrack = dL_dLoc(rbrack)
  5108. delta *= 2
  5109. if not np.isfinite(rbrack) or not np.isfinite(dL_dLoc_rbrack):
  5110. # If we never find a negative slope, either we missed it or the
  5111. # slope is always positive. It's usually the latter,
  5112. # which means
  5113. # loc = data_min - spacing
  5114. # But sometimes when shape and/or scale are fixed there are
  5115. # other issues, so be cautious.
  5116. return super().fit(data, *args, **kwds)
  5117. # Now find the left end of the bracket. Guess is `rbrack-1`
  5118. # unless that is too small of a difference to resolve. Double
  5119. # the size of the interval until the left end is found.
  5120. lbrack = np.minimum(np.nextafter(rbrack, -np.inf), rbrack-1)
  5121. dL_dLoc_lbrack = dL_dLoc(lbrack)
  5122. delta = 2 * (rbrack - lbrack)
  5123. while (np.isfinite(lbrack) and np.isfinite(dL_dLoc_lbrack)
  5124. and np.sign(dL_dLoc_lbrack) == np.sign(dL_dLoc_rbrack)):
  5125. lbrack = rbrack - delta
  5126. dL_dLoc_lbrack = dL_dLoc(lbrack)
  5127. delta *= 2
  5128. # I don't recall observing this, but just in case...
  5129. if not np.isfinite(lbrack) or not np.isfinite(dL_dLoc_lbrack):
  5130. return super().fit(data, *args, **kwds)
  5131. # If we have a valid bracket, find the root
  5132. res = root_scalar(dL_dLoc, bracket=(lbrack, rbrack))
  5133. if not res.converged:
  5134. return super().fit(data, *args, **kwds)
  5135. # If the slope was positive near the minimum of the data,
  5136. # the maximum LL could be there instead of at the root. Compare
  5137. # the LL of the two points to decide.
  5138. ll_root = ll(res.root)
  5139. loc = res.root if ll_root > ll_rbrack else data_min-spacing
  5140. else:
  5141. if floc >= data_min:
  5142. raise FitDataError("lognorm", lower=0., upper=np.inf)
  5143. loc = floc
  5144. shape, scale = get_shape_scale(loc)
  5145. if not (self._argcheck(shape) and scale > 0):
  5146. return super().fit(data, *args, **kwds)
  5147. return shape, loc, scale
  5148. lognorm = lognorm_gen(a=0.0, name='lognorm')
  5149. class gibrat_gen(rv_continuous):
  5150. r"""A Gibrat continuous random variable.
  5151. %(before_notes)s
  5152. Notes
  5153. -----
  5154. The probability density function for `gibrat` is:
  5155. .. math::
  5156. f(x) = \frac{1}{x \sqrt{2\pi}} \exp(-\frac{1}{2} (\log(x))^2)
  5157. for :math:`x >= 0`.
  5158. `gibrat` is a special case of `lognorm` with ``s=1``.
  5159. %(after_notes)s
  5160. %(example)s
  5161. """
  5162. _support_mask = rv_continuous._open_support_mask
  5163. def _shape_info(self):
  5164. return []
  5165. def _rvs(self, size=None, random_state=None):
  5166. return np.exp(random_state.standard_normal(size))
  5167. def _pdf(self, x):
  5168. # gibrat.pdf(x) = 1/(x*sqrt(2*pi)) * exp(-1/2*(log(x))**2)
  5169. return np.exp(self._logpdf(x))
  5170. def _logpdf(self, x):
  5171. return _lognorm_logpdf(x, 1.0)
  5172. def _cdf(self, x):
  5173. return _norm_cdf(np.log(x))
  5174. def _ppf(self, q):
  5175. return np.exp(_norm_ppf(q))
  5176. def _sf(self, x):
  5177. return _norm_sf(np.log(x))
  5178. def _isf(self, p):
  5179. return np.exp(_norm_isf(p))
  5180. def _stats(self):
  5181. p = np.e
  5182. mu = np.sqrt(p)
  5183. mu2 = p * (p - 1)
  5184. g1 = np.sqrt(p - 1) * (2 + p)
  5185. g2 = np.polyval([1, 2, 3, 0, -6.0], p)
  5186. return mu, mu2, g1, g2
  5187. def _entropy(self):
  5188. return 0.5 * np.log(2 * np.pi) + 0.5
  5189. gibrat = gibrat_gen(a=0.0, name='gibrat')
  5190. class maxwell_gen(rv_continuous):
  5191. r"""A Maxwell continuous random variable.
  5192. %(before_notes)s
  5193. Notes
  5194. -----
  5195. A special case of a `chi` distribution, with ``df=3``, ``loc=0.0``,
  5196. and given ``scale = a``, where ``a`` is the parameter used in the
  5197. Mathworld description [1]_.
  5198. The probability density function for `maxwell` is:
  5199. .. math::
  5200. f(x) = \sqrt{2/\pi}x^2 \exp(-x^2/2)
  5201. for :math:`x >= 0`.
  5202. %(after_notes)s
  5203. References
  5204. ----------
  5205. .. [1] http://mathworld.wolfram.com/MaxwellDistribution.html
  5206. %(example)s
  5207. """
  5208. def _shape_info(self):
  5209. return []
  5210. def _rvs(self, size=None, random_state=None):
  5211. return chi.rvs(3.0, size=size, random_state=random_state)
  5212. def _pdf(self, x):
  5213. # maxwell.pdf(x) = sqrt(2/pi)x**2 * exp(-x**2/2)
  5214. return _SQRT_2_OVER_PI*x*x*np.exp(-x*x/2.0)
  5215. def _logpdf(self, x):
  5216. # Allow x=0 without 'divide by zero' warnings
  5217. with np.errstate(divide='ignore'):
  5218. return _LOG_SQRT_2_OVER_PI + 2*np.log(x) - 0.5*x*x
  5219. def _cdf(self, x):
  5220. return sc.gammainc(1.5, x*x/2.0)
  5221. def _ppf(self, q):
  5222. return np.sqrt(2*sc.gammaincinv(1.5, q))
  5223. def _sf(self, x):
  5224. return sc.gammaincc(1.5, x*x/2.0)
  5225. def _isf(self, q):
  5226. return np.sqrt(2*sc.gammainccinv(1.5, q))
  5227. def _stats(self):
  5228. val = 3*np.pi-8
  5229. return (2*np.sqrt(2.0/np.pi),
  5230. 3-8/np.pi,
  5231. np.sqrt(2)*(32-10*np.pi)/val**1.5,
  5232. (-12*np.pi*np.pi + 160*np.pi - 384) / val**2.0)
  5233. def _entropy(self):
  5234. return _EULER + 0.5*np.log(2*np.pi)-0.5
  5235. maxwell = maxwell_gen(a=0.0, name='maxwell')
  5236. class mielke_gen(rv_continuous):
  5237. r"""A Mielke Beta-Kappa / Dagum continuous random variable.
  5238. %(before_notes)s
  5239. Notes
  5240. -----
  5241. The probability density function for `mielke` is:
  5242. .. math::
  5243. f(x, k, s) = \frac{k x^{k-1}}{(1+x^s)^{1+k/s}}
  5244. for :math:`x > 0` and :math:`k, s > 0`. The distribution is sometimes
  5245. called Dagum distribution ([2]_). It was already defined in [3]_, called
  5246. a Burr Type III distribution (`burr` with parameters ``c=s`` and
  5247. ``d=k/s``).
  5248. `mielke` takes ``k`` and ``s`` as shape parameters.
  5249. %(after_notes)s
  5250. References
  5251. ----------
  5252. .. [1] Mielke, P.W., 1973 "Another Family of Distributions for Describing
  5253. and Analyzing Precipitation Data." J. Appl. Meteor., 12, 275-280
  5254. .. [2] Dagum, C., 1977 "A new model for personal income distribution."
  5255. Economie Appliquee, 33, 327-367.
  5256. .. [3] Burr, I. W. "Cumulative frequency functions", Annals of
  5257. Mathematical Statistics, 13(2), pp 215-232 (1942).
  5258. %(example)s
  5259. """
  5260. def _shape_info(self):
  5261. ik = _ShapeInfo("k", False, (0, np.inf), (False, False))
  5262. i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
  5263. return [ik, i_s]
  5264. def _pdf(self, x, k, s):
  5265. return k*x**(k-1.0) / (1.0+x**s)**(1.0+k*1.0/s)
  5266. def _logpdf(self, x, k, s):
  5267. # Allow x=0 without 'divide by zero' warnings.
  5268. with np.errstate(divide='ignore'):
  5269. return np.log(k) + np.log(x)*(k - 1) - np.log1p(x**s)*(1 + k/s)
  5270. def _cdf(self, x, k, s):
  5271. return x**k / (1.0+x**s)**(k*1.0/s)
  5272. def _ppf(self, q, k, s):
  5273. qsk = pow(q, s*1.0/k)
  5274. return pow(qsk/(1.0-qsk), 1.0/s)
  5275. def _munp(self, n, k, s):
  5276. def nth_moment(n, k, s):
  5277. # n-th moment is defined for -k < n < s
  5278. return sc.gamma((k+n)/s)*sc.gamma(1-n/s)/sc.gamma(k/s)
  5279. return xpx.apply_where(n < s, (n, k, s), nth_moment, fill_value=np.inf)
  5280. mielke = mielke_gen(a=0.0, name='mielke')
  5281. class kappa4_gen(rv_continuous):
  5282. r"""Kappa 4 parameter distribution.
  5283. %(before_notes)s
  5284. Notes
  5285. -----
  5286. The probability density function for kappa4 is:
  5287. .. math::
  5288. f(x, h, k) = (1 - k x)^{1/k - 1} (1 - h (1 - k x)^{1/k})^{1/h-1}
  5289. if :math:`h` and :math:`k` are not equal to 0.
  5290. If :math:`h` or :math:`k` are zero then the pdf can be simplified:
  5291. :math:`h = 0` and :math:`k \neq 0`::
  5292. kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
  5293. exp(-(1.0 - k*x)**(1.0/k))
  5294. :math:`h \neq 0` and :math:`k = 0`::
  5295. kappa4.pdf(x, h, k) = exp(-x)*(1.0 - h*exp(-x))**(1.0/h - 1.0)
  5296. :math:`h = 0` and :math:`k = 0`::
  5297. kappa4.pdf(x, h, k) = exp(-x)*exp(-exp(-x))
  5298. kappa4 takes :math:`h` and :math:`k` as shape parameters.
  5299. The kappa4 distribution returns other distributions when certain
  5300. :math:`h` and :math:`k` values are used.
  5301. +------+-------------+----------------+------------------+
  5302. | h | k=0.0 | k=1.0 | -inf<=k<=inf |
  5303. +======+=============+================+==================+
  5304. | -1.0 | Logistic | | Generalized |
  5305. | | | | Logistic(1) |
  5306. | | | | |
  5307. | | logistic(x) | | |
  5308. +------+-------------+----------------+------------------+
  5309. | 0.0 | Gumbel | Reverse | Generalized |
  5310. | | | Exponential(2) | Extreme Value |
  5311. | | | | |
  5312. | | gumbel_r(x) | | genextreme(x, k) |
  5313. +------+-------------+----------------+------------------+
  5314. | 1.0 | Exponential | Uniform | Generalized |
  5315. | | | | Pareto |
  5316. | | | | |
  5317. | | expon(x) | uniform(x) | genpareto(x, -k) |
  5318. +------+-------------+----------------+------------------+
  5319. (1) There are at least five generalized logistic distributions.
  5320. Four are described here:
  5321. https://en.wikipedia.org/wiki/Generalized_logistic_distribution
  5322. The "fifth" one is the one kappa4 should match which currently
  5323. isn't implemented in scipy:
  5324. https://en.wikipedia.org/wiki/Talk:Generalized_logistic_distribution
  5325. https://www.mathwave.com/help/easyfit/html/analyses/distributions/gen_logistic.html
  5326. (2) This distribution is currently not in scipy.
  5327. References
  5328. ----------
  5329. J.C. Finney, "Optimization of a Skewed Logistic Distribution With Respect
  5330. to the Kolmogorov-Smirnov Test", A Dissertation Submitted to the Graduate
  5331. Faculty of the Louisiana State University and Agricultural and Mechanical
  5332. College, (August, 2004),
  5333. https://digitalcommons.lsu.edu/gradschool_dissertations/3672
  5334. J.R.M. Hosking, "The four-parameter kappa distribution". IBM J. Res.
  5335. Develop. 38 (3), 25 1-258 (1994).
  5336. B. Kumphon, A. Kaew-Man, P. Seenoi, "A Rainfall Distribution for the Lampao
  5337. Site in the Chi River Basin, Thailand", Journal of Water Resource and
  5338. Protection, vol. 4, 866-869, (2012).
  5339. :doi:`10.4236/jwarp.2012.410101`
  5340. C. Winchester, "On Estimation of the Four-Parameter Kappa Distribution", A
  5341. Thesis Submitted to Dalhousie University, Halifax, Nova Scotia, (March
  5342. 2000).
  5343. http://www.nlc-bnc.ca/obj/s4/f2/dsk2/ftp01/MQ57336.pdf
  5344. %(after_notes)s
  5345. %(example)s
  5346. """
  5347. def _argcheck(self, h, k):
  5348. shape = np.broadcast_arrays(h, k)[0].shape
  5349. return np.full(shape, fill_value=True)
  5350. def _shape_info(self):
  5351. ih = _ShapeInfo("h", False, (-np.inf, np.inf), (False, False))
  5352. ik = _ShapeInfo("k", False, (-np.inf, np.inf), (False, False))
  5353. return [ih, ik]
  5354. def _get_support(self, h, k):
  5355. condlist = [np.logical_and(h > 0, k > 0),
  5356. np.logical_and(h > 0, k == 0),
  5357. np.logical_and(h > 0, k < 0),
  5358. np.logical_and(h <= 0, k > 0),
  5359. np.logical_and(h <= 0, k == 0),
  5360. np.logical_and(h <= 0, k < 0)]
  5361. def f0(h, k):
  5362. return (1.0 - np.float_power(h, -k))/k
  5363. def f1(h, k):
  5364. return np.log(h)
  5365. def f3(h, k):
  5366. a = np.empty(np.shape(h))
  5367. a[:] = -np.inf
  5368. return a
  5369. def f5(h, k):
  5370. return 1.0/k
  5371. _a = _lazyselect(condlist,
  5372. [f0, f1, f0, f3, f3, f5],
  5373. [h, k],
  5374. default=np.nan)
  5375. def f0(h, k):
  5376. return 1.0/k
  5377. def f1(h, k):
  5378. a = np.empty(np.shape(h))
  5379. a[:] = np.inf
  5380. return a
  5381. _b = _lazyselect(condlist,
  5382. [f0, f1, f1, f0, f1, f1],
  5383. [h, k],
  5384. default=np.nan)
  5385. return _a, _b
  5386. def _pdf(self, x, h, k):
  5387. # kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
  5388. # (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1)
  5389. return np.exp(self._logpdf(x, h, k))
  5390. def _logpdf(self, x, h, k):
  5391. condlist = [np.logical_and(h != 0, k != 0),
  5392. np.logical_and(h == 0, k != 0),
  5393. np.logical_and(h != 0, k == 0),
  5394. np.logical_and(h == 0, k == 0)]
  5395. def f0(x, h, k):
  5396. '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*(
  5397. 1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1.0)
  5398. logpdf = ...
  5399. '''
  5400. return (sc.xlog1py(1.0/k - 1.0, -k*x) +
  5401. sc.xlog1py(1.0/h - 1.0, -h*(1.0 - k*x)**(1.0/k)))
  5402. def f1(x, h, k):
  5403. '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*np.exp(-(
  5404. 1.0 - k*x)**(1.0/k))
  5405. logpdf = ...
  5406. '''
  5407. return sc.xlog1py(1.0/k - 1.0, -k*x) - (1.0 - k*x)**(1.0/k)
  5408. def f2(x, h, k):
  5409. '''pdf = np.exp(-x)*(1.0 - h*np.exp(-x))**(1.0/h - 1.0)
  5410. logpdf = ...
  5411. '''
  5412. return -x + sc.xlog1py(1.0/h - 1.0, -h*np.exp(-x))
  5413. def f3(x, h, k):
  5414. '''pdf = np.exp(-x-np.exp(-x))
  5415. logpdf = ...
  5416. '''
  5417. return -x - np.exp(-x)
  5418. return _lazyselect(condlist,
  5419. [f0, f1, f2, f3],
  5420. [x, h, k],
  5421. default=np.nan)
  5422. def _cdf(self, x, h, k):
  5423. return np.exp(self._logcdf(x, h, k))
  5424. def _logcdf(self, x, h, k):
  5425. condlist = [np.logical_and(h != 0, k != 0),
  5426. np.logical_and(h == 0, k != 0),
  5427. np.logical_and(h != 0, k == 0),
  5428. np.logical_and(h == 0, k == 0)]
  5429. def f0(x, h, k):
  5430. '''cdf = (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h)
  5431. logcdf = ...
  5432. '''
  5433. return (1.0/h)*sc.log1p(-h*(1.0 - k*x)**(1.0/k))
  5434. def f1(x, h, k):
  5435. '''cdf = np.exp(-(1.0 - k*x)**(1.0/k))
  5436. logcdf = ...
  5437. '''
  5438. return -(1.0 - k*x)**(1.0/k)
  5439. def f2(x, h, k):
  5440. '''cdf = (1.0 - h*np.exp(-x))**(1.0/h)
  5441. logcdf = ...
  5442. '''
  5443. return (1.0/h)*sc.log1p(-h*np.exp(-x))
  5444. def f3(x, h, k):
  5445. '''cdf = np.exp(-np.exp(-x))
  5446. logcdf = ...
  5447. '''
  5448. return -np.exp(-x)
  5449. return _lazyselect(condlist,
  5450. [f0, f1, f2, f3],
  5451. [x, h, k],
  5452. default=np.nan)
  5453. def _ppf(self, q, h, k):
  5454. condlist = [np.logical_and(h != 0, k != 0),
  5455. np.logical_and(h == 0, k != 0),
  5456. np.logical_and(h != 0, k == 0),
  5457. np.logical_and(h == 0, k == 0)]
  5458. def f0(q, h, k):
  5459. return 1.0/k*(1.0 - ((1.0 - (q**h))/h)**k)
  5460. def f1(q, h, k):
  5461. return 1.0/k*(1.0 - (-np.log(q))**k)
  5462. def f2(q, h, k):
  5463. '''ppf = -np.log((1.0 - (q**h))/h)
  5464. '''
  5465. return -sc.log1p(-(q**h)) + np.log(h)
  5466. def f3(q, h, k):
  5467. return -np.log(-np.log(q))
  5468. return _lazyselect(condlist,
  5469. [f0, f1, f2, f3],
  5470. [q, h, k],
  5471. default=np.nan)
  5472. def _get_stats_info(self, h, k):
  5473. condlist = [
  5474. np.logical_and(h < 0, k >= 0),
  5475. k < 0,
  5476. ]
  5477. def f0(h, k):
  5478. return (-1.0/h*k).astype(int)
  5479. def f1(h, k):
  5480. return (-1.0/k).astype(int)
  5481. return _lazyselect(condlist, [f0, f1], [h, k], default=5)
  5482. def _stats(self, h, k):
  5483. maxr = self._get_stats_info(h, k)
  5484. outputs = [None if np.any(r < maxr) else np.nan for r in range(1, 5)]
  5485. return outputs[:]
  5486. def _mom1_sc(self, m, *args):
  5487. maxr = self._get_stats_info(args[0], args[1])
  5488. if m >= maxr:
  5489. return np.nan
  5490. return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
  5491. kappa4 = kappa4_gen(name='kappa4')
  5492. class kappa3_gen(rv_continuous):
  5493. r"""Kappa 3 parameter distribution.
  5494. %(before_notes)s
  5495. Notes
  5496. -----
  5497. The probability density function for `kappa3` is:
  5498. .. math::
  5499. f(x, a) = a (a + x^a)^{-(a + 1)/a}
  5500. for :math:`x > 0` and :math:`a > 0`.
  5501. `kappa3` takes ``a`` as a shape parameter for :math:`a`.
  5502. References
  5503. ----------
  5504. P.W. Mielke and E.S. Johnson, "Three-Parameter Kappa Distribution Maximum
  5505. Likelihood and Likelihood Ratio Tests", Methods in Weather Research,
  5506. 701-707, (September, 1973),
  5507. :doi:`10.1175/1520-0493(1973)101<0701:TKDMLE>2.3.CO;2`
  5508. B. Kumphon, "Maximum Entropy and Maximum Likelihood Estimation for the
  5509. Three-Parameter Kappa Distribution", Open Journal of Statistics, vol 2,
  5510. 415-419 (2012), :doi:`10.4236/ojs.2012.24050`
  5511. %(after_notes)s
  5512. %(example)s
  5513. """
  5514. def _shape_info(self):
  5515. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  5516. def _pdf(self, x, a):
  5517. # kappa3.pdf(x, a) = a*(a + x**a)**(-(a + 1)/a), for x > 0
  5518. return a*(a + x**a)**(-1.0/a-1)
  5519. def _cdf(self, x, a):
  5520. return x*(a + x**a)**(-1.0/a)
  5521. def _sf(self, x, a):
  5522. x, a = np.broadcast_arrays(x, a) # some code paths pass scalars
  5523. sf = super()._sf(x, a)
  5524. # When the SF is small, another formulation is typically more accurate.
  5525. # However, it blows up for large `a`, so use it only if it also returns
  5526. # a small value of the SF.
  5527. cutoff = 0.01
  5528. i = sf < cutoff
  5529. sf2 = -sc.expm1(sc.xlog1py(-1.0 / a[i], a[i] * x[i]**-a[i]))
  5530. i2 = sf2 > cutoff
  5531. sf2[i2] = sf[i][i2] # replace bad values with original values
  5532. sf[i] = sf2
  5533. return sf
  5534. def _ppf(self, q, a):
  5535. return (a/(q**-a - 1.0))**(1.0/a)
  5536. def _isf(self, q, a):
  5537. lg = sc.xlog1py(-a, -q)
  5538. denom = sc.expm1(lg)
  5539. return (a / denom)**(1.0 / a)
  5540. def _stats(self, a):
  5541. outputs = [None if np.any(i < a) else np.nan for i in range(1, 5)]
  5542. return outputs[:]
  5543. def _mom1_sc(self, m, *args):
  5544. if np.any(m >= args[0]):
  5545. return np.nan
  5546. return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
  5547. kappa3 = kappa3_gen(a=0.0, name='kappa3')
  5548. class moyal_gen(rv_continuous):
  5549. r"""A Moyal continuous random variable.
  5550. %(before_notes)s
  5551. Notes
  5552. -----
  5553. The probability density function for `moyal` is:
  5554. .. math::
  5555. f(x) = \exp(-(x + \exp(-x))/2) / \sqrt{2\pi}
  5556. for a real number :math:`x`.
  5557. %(after_notes)s
  5558. This distribution has utility in high-energy physics and radiation
  5559. detection. It describes the energy loss of a charged relativistic
  5560. particle due to ionization of the medium [1]_. It also provides an
  5561. approximation for the Landau distribution. For an in depth description
  5562. see [2]_. For additional description, see [3]_.
  5563. References
  5564. ----------
  5565. .. [1] J.E. Moyal, "XXX. Theory of ionization fluctuations",
  5566. The London, Edinburgh, and Dublin Philosophical Magazine
  5567. and Journal of Science, vol 46, 263-280, (1955).
  5568. :doi:`10.1080/14786440308521076` (gated)
  5569. .. [2] G. Cordeiro et al., "The beta Moyal: A useful skew distribution",
  5570. International Journal of Research and Reviews in Applied Sciences,
  5571. vol 10, 171-192, (2012).
  5572. https://www.arpapress.com/files/volumes/vol10issue2/ijrras_10_2_02.pdf
  5573. .. [3] C. Walck, "Handbook on Statistical Distributions for
  5574. Experimentalists; International Report SUF-PFY/96-01", Chapter 26,
  5575. University of Stockholm: Stockholm, Sweden, (2007).
  5576. http://www.stat.rice.edu/~dobelman/textfiles/DistributionsHandbook.pdf
  5577. .. versionadded:: 1.1.0
  5578. %(example)s
  5579. """
  5580. def _shape_info(self):
  5581. return []
  5582. def _rvs(self, size=None, random_state=None):
  5583. u1 = gamma.rvs(a=0.5, scale=2, size=size,
  5584. random_state=random_state)
  5585. return -np.log(u1)
  5586. def _pdf(self, x):
  5587. return np.exp(-0.5 * (x + np.exp(-x))) / np.sqrt(2*np.pi)
  5588. def _cdf(self, x):
  5589. return sc.erfc(np.exp(-0.5 * x) / np.sqrt(2))
  5590. def _sf(self, x):
  5591. return sc.erf(np.exp(-0.5 * x) / np.sqrt(2))
  5592. def _ppf(self, x):
  5593. return -np.log(2 * sc.erfcinv(x)**2)
  5594. def _stats(self):
  5595. mu = np.log(2) + np.euler_gamma
  5596. mu2 = np.pi**2 / 2
  5597. g1 = 28 * np.sqrt(2) * sc.zeta(3) / np.pi**3
  5598. g2 = 4.
  5599. return mu, mu2, g1, g2
  5600. def _munp(self, n):
  5601. if n == 1.0:
  5602. return np.log(2) + np.euler_gamma
  5603. elif n == 2.0:
  5604. return np.pi**2 / 2 + (np.log(2) + np.euler_gamma)**2
  5605. elif n == 3.0:
  5606. tmp1 = 1.5 * np.pi**2 * (np.log(2)+np.euler_gamma)
  5607. tmp2 = (np.log(2)+np.euler_gamma)**3
  5608. tmp3 = 14 * sc.zeta(3)
  5609. return tmp1 + tmp2 + tmp3
  5610. elif n == 4.0:
  5611. tmp1 = 4 * 14 * sc.zeta(3) * (np.log(2) + np.euler_gamma)
  5612. tmp2 = 3 * np.pi**2 * (np.log(2) + np.euler_gamma)**2
  5613. tmp3 = (np.log(2) + np.euler_gamma)**4
  5614. tmp4 = 7 * np.pi**4 / 4
  5615. return tmp1 + tmp2 + tmp3 + tmp4
  5616. else:
  5617. # return generic for higher moments
  5618. # return rv_continuous._mom1_sc(self, n, b)
  5619. return self._mom1_sc(n)
  5620. moyal = moyal_gen(name="moyal")
  5621. class nakagami_gen(rv_continuous):
  5622. r"""A Nakagami continuous random variable.
  5623. %(before_notes)s
  5624. Notes
  5625. -----
  5626. The probability density function for `nakagami` is:
  5627. .. math::
  5628. f(x, \nu) = \frac{2 \nu^\nu}{\Gamma(\nu)} x^{2\nu-1} \exp(-\nu x^2)
  5629. for :math:`x >= 0`, :math:`\nu > 0`. The distribution was introduced in
  5630. [2]_, see also [1]_ for further information.
  5631. `nakagami` takes ``nu`` as a shape parameter for :math:`\nu`.
  5632. %(after_notes)s
  5633. References
  5634. ----------
  5635. .. [1] "Nakagami distribution", Wikipedia
  5636. https://en.wikipedia.org/wiki/Nakagami_distribution
  5637. .. [2] M. Nakagami, "The m-distribution - A general formula of intensity
  5638. distribution of rapid fading", Statistical methods in radio wave
  5639. propagation, Pergamon Press, 1960, 3-36.
  5640. :doi:`10.1016/B978-0-08-009306-2.50005-4`
  5641. %(example)s
  5642. """
  5643. def _argcheck(self, nu):
  5644. return nu > 0
  5645. def _shape_info(self):
  5646. return [_ShapeInfo("nu", False, (0, np.inf), (False, False))]
  5647. def _pdf(self, x, nu):
  5648. return np.exp(self._logpdf(x, nu))
  5649. def _logpdf(self, x, nu):
  5650. # nakagami.pdf(x, nu) = 2 * nu**nu / gamma(nu) *
  5651. # x**(2*nu-1) * exp(-nu*x**2)
  5652. return (np.log(2) + sc.xlogy(nu, nu) - sc.gammaln(nu) +
  5653. sc.xlogy(2*nu - 1, x) - nu*x**2)
  5654. def _cdf(self, x, nu):
  5655. return sc.gammainc(nu, nu*x*x)
  5656. def _ppf(self, q, nu):
  5657. return np.sqrt(1.0/nu*sc.gammaincinv(nu, q))
  5658. def _sf(self, x, nu):
  5659. return sc.gammaincc(nu, nu*x*x)
  5660. def _isf(self, p, nu):
  5661. return np.sqrt(1/nu * sc.gammainccinv(nu, p))
  5662. def _stats(self, nu):
  5663. mu = sc.poch(nu, 0.5)/np.sqrt(nu)
  5664. mu2 = 1.0-mu*mu
  5665. g1 = mu * (1 - 4*nu*mu2) / 2.0 / nu / np.power(mu2, 1.5)
  5666. g2 = -6*mu**4*nu + (8*nu-2)*mu**2-2*nu + 1
  5667. g2 /= nu*mu2**2.0
  5668. return mu, mu2, g1, g2
  5669. def _entropy(self, nu):
  5670. shape = np.shape(nu)
  5671. # because somehow this isn't taken care of by the infrastructure...
  5672. nu = np.atleast_1d(nu)
  5673. A = sc.gammaln(nu)
  5674. B = nu - (nu - 0.5) * sc.digamma(nu)
  5675. C = -0.5 * np.log(nu) - np.log(2)
  5676. h = A + B + C
  5677. # This is the asymptotic sum of A and B (see gh-17868)
  5678. norm_entropy = stats.norm._entropy()
  5679. # Above, this is lost to rounding error for large nu, so use the
  5680. # asymptotic sum when the approximation becomes accurate
  5681. i = nu > 5e4 # roundoff error ~ approximation error
  5682. # -1 / (12 * nu) is the O(1/nu) term; see gh-17929
  5683. h[i] = C[i] + norm_entropy - 1/(12*nu[i])
  5684. return h.reshape(shape)[()]
  5685. def _rvs(self, nu, size=None, random_state=None):
  5686. # this relationship can be found in [1] or by a direct calculation
  5687. return np.sqrt(random_state.standard_gamma(nu, size=size) / nu)
  5688. def _fitstart(self, data, args=None):
  5689. if isinstance(data, CensoredData):
  5690. data = data._uncensor()
  5691. if args is None:
  5692. args = (1.0,) * self.numargs
  5693. # Analytical justified estimates
  5694. # see: https://docs.scipy.org/doc/scipy/reference/tutorial/stats/continuous_nakagami.html
  5695. loc = np.min(data)
  5696. scale = np.sqrt(np.sum((data - loc)**2) / len(data))
  5697. return args + (loc, scale)
  5698. nakagami = nakagami_gen(a=0.0, name="nakagami")
  5699. # The function name ncx2 is an abbreviation for noncentral chi squared.
  5700. def _ncx2_log_pdf(x, df, nc):
  5701. # We use (xs**2 + ns**2)/2 = (xs - ns)**2/2 + xs*ns, and include the
  5702. # factor of exp(-xs*ns) into the ive function to improve numerical
  5703. # stability at large values of xs. See also `rice.pdf`.
  5704. df2 = df/2.0 - 1.0
  5705. xs, ns = np.sqrt(x), np.sqrt(nc)
  5706. res = sc.xlogy(df2/2.0, x/nc) - 0.5*(xs - ns)**2
  5707. corr = sc.ive(df2, xs*ns) / 2.0
  5708. # Return res + np.log(corr) avoiding np.log(0)
  5709. return xpx.apply_where(
  5710. corr > 0,
  5711. (res, corr),
  5712. lambda r, c: r + np.log(c),
  5713. fill_value=-np.inf)
  5714. class ncx2_gen(rv_continuous):
  5715. r"""A non-central chi-squared continuous random variable.
  5716. %(before_notes)s
  5717. Notes
  5718. -----
  5719. The probability density function for `ncx2` is:
  5720. .. math::
  5721. f(x, k, \lambda) = \frac{1}{2} \exp(-(\lambda+x)/2)
  5722. (x/\lambda)^{(k-2)/4} I_{(k-2)/2}(\sqrt{\lambda x})
  5723. for :math:`x >= 0`, :math:`k > 0` and :math:`\lambda \ge 0`.
  5724. :math:`k` specifies the degrees of freedom (denoted ``df`` in the
  5725. implementation) and :math:`\lambda` is the non-centrality parameter
  5726. (denoted ``nc`` in the implementation). :math:`I_\nu` denotes the
  5727. modified Bessel function of first order of degree :math:`\nu`
  5728. (`scipy.special.iv`).
  5729. `ncx2` takes ``df`` and ``nc`` as shape parameters.
  5730. This distribution uses routines from the Boost Math C++ library for
  5731. the computation of the ``pdf``, ``cdf``, ``ppf``, ``sf`` and ``isf``
  5732. methods. [1]_
  5733. %(after_notes)s
  5734. References
  5735. ----------
  5736. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  5737. %(example)s
  5738. """
  5739. def _argcheck(self, df, nc):
  5740. return (df > 0) & np.isfinite(df) & (nc >= 0)
  5741. def _shape_info(self):
  5742. idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
  5743. inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
  5744. return [idf, inc]
  5745. def _rvs(self, df, nc, size=None, random_state=None):
  5746. return random_state.noncentral_chisquare(df, nc, size)
  5747. def _logpdf(self, x, df, nc):
  5748. return xpx.apply_where(nc != 0, (x, df, nc), _ncx2_log_pdf,
  5749. lambda x, df, _: chi2._logpdf(x, df))
  5750. def _pdf(self, x, df, nc):
  5751. with np.errstate(over='ignore'): # see gh-17432
  5752. return xpx.apply_where(nc != 0, (x, df, nc), scu._ncx2_pdf,
  5753. lambda x, df, _: chi2._pdf(x, df))
  5754. def _cdf(self, x, df, nc):
  5755. with np.errstate(over='ignore'): # see gh-17432
  5756. return xpx.apply_where(nc != 0, (x, df, nc), sc.chndtr,
  5757. lambda x, df, _: chi2._cdf(x, df))
  5758. def _ppf(self, q, df, nc):
  5759. with np.errstate(over='ignore'): # see gh-17432
  5760. return xpx.apply_where(nc != 0, (q, df, nc), sc.chndtrix,
  5761. lambda x, df, _: chi2._ppf(x, df))
  5762. def _sf(self, x, df, nc):
  5763. with np.errstate(over='ignore'): # see gh-17432
  5764. return xpx.apply_where(nc != 0, (x, df, nc), scu._ncx2_sf,
  5765. lambda x, df, _: chi2._sf(x, df))
  5766. def _isf(self, x, df, nc):
  5767. with np.errstate(over='ignore'): # see gh-17432
  5768. return xpx.apply_where(nc != 0, (x, df, nc), scu._ncx2_isf,
  5769. lambda x, df, _: chi2._isf(x, df))
  5770. def _stats(self, df, nc):
  5771. _ncx2_mean = df + nc
  5772. def k_plus_cl(k, l, c):
  5773. return k + c*l
  5774. _ncx2_variance = 2.0 * k_plus_cl(df, nc, 2.0)
  5775. _ncx2_skewness = (np.sqrt(8.0) * k_plus_cl(df, nc, 3) /
  5776. np.sqrt(k_plus_cl(df, nc, 2.0)**3))
  5777. _ncx2_kurtosis_excess = (12.0 * k_plus_cl(df, nc, 4.0) /
  5778. k_plus_cl(df, nc, 2.0)**2)
  5779. return (
  5780. _ncx2_mean,
  5781. _ncx2_variance,
  5782. _ncx2_skewness,
  5783. _ncx2_kurtosis_excess,
  5784. )
  5785. ncx2 = ncx2_gen(a=0.0, name='ncx2')
  5786. class ncf_gen(rv_continuous):
  5787. r"""A non-central F distribution continuous random variable.
  5788. %(before_notes)s
  5789. See Also
  5790. --------
  5791. scipy.stats.f : Fisher distribution
  5792. Notes
  5793. -----
  5794. The probability density function for `ncf` is:
  5795. .. math::
  5796. f(x, n_1, n_2, \lambda) =
  5797. \exp\left(\frac{\lambda}{2} +
  5798. \lambda n_1 \frac{x}{2(n_1 x + n_2)}
  5799. \right)
  5800. n_1^{n_1/2} n_2^{n_2/2} x^{n_1/2 - 1} \\
  5801. (n_2 + n_1 x)^{-(n_1 + n_2)/2}
  5802. \gamma(n_1/2) \gamma(1 + n_2/2) \\
  5803. \frac{L^{\frac{n_1}{2}-1}_{n_2/2}
  5804. \left(-\lambda n_1 \frac{x}{2(n_1 x + n_2)}\right)}
  5805. {B(n_1/2, n_2/2)
  5806. \gamma\left(\frac{n_1 + n_2}{2}\right)}
  5807. for :math:`n_1, n_2 > 0`, :math:`\lambda \ge 0`. Here :math:`n_1` is the
  5808. degrees of freedom in the numerator, :math:`n_2` the degrees of freedom in
  5809. the denominator, :math:`\lambda` the non-centrality parameter,
  5810. :math:`\gamma` is the logarithm of the Gamma function, :math:`L_n^k` is a
  5811. generalized Laguerre polynomial and :math:`B` is the beta function.
  5812. `ncf` takes ``dfn``, ``dfd`` and ``nc`` as shape parameters. If ``nc=0``,
  5813. the distribution becomes equivalent to the Fisher distribution.
  5814. This distribution uses routines from the Boost Math C++ library for
  5815. the computation of the ``pdf``, ``cdf``, ``ppf``, ``stats``, ``sf`` and
  5816. ``isf`` methods. [1]_
  5817. %(after_notes)s
  5818. References
  5819. ----------
  5820. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  5821. %(example)s
  5822. """
  5823. def _argcheck(self, dfn, dfd, nc):
  5824. return (dfn > 0) & (dfd > 0) & (nc >= 0)
  5825. def _shape_info(self):
  5826. idf1 = _ShapeInfo("dfn", False, (0, np.inf), (False, False))
  5827. idf2 = _ShapeInfo("dfd", False, (0, np.inf), (False, False))
  5828. inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
  5829. return [idf1, idf2, inc]
  5830. def _rvs(self, dfn, dfd, nc, size=None, random_state=None):
  5831. return random_state.noncentral_f(dfn, dfd, nc, size)
  5832. def _pdf(self, x, dfn, dfd, nc):
  5833. return scu._ncf_pdf(x, dfn, dfd, nc)
  5834. def _cdf(self, x, dfn, dfd, nc):
  5835. return sc.ncfdtr(dfn, dfd, nc, x)
  5836. def _ppf(self, q, dfn, dfd, nc):
  5837. with np.errstate(over='ignore'): # see gh-17432
  5838. return sc.ncfdtri(dfn, dfd, nc, q)
  5839. def _sf(self, x, dfn, dfd, nc):
  5840. return scu._ncf_sf(x, dfn, dfd, nc)
  5841. def _isf(self, x, dfn, dfd, nc):
  5842. with np.errstate(over='ignore'): # see gh-17432
  5843. return scu._ncf_isf(x, dfn, dfd, nc)
  5844. # # Produces bogus values as written - maybe it's close, though?
  5845. # def _munp(self, n, dfn, dfd, nc):
  5846. # val = (dfn * 1.0/dfd)**n
  5847. # term = sc.gammaln(n+0.5*dfn) + sc.gammaln(0.5*dfd-n) - sc.gammaln(dfd*0.5)
  5848. # val *= np.exp(-nc / 2.0+term)
  5849. # val *= sc.hyp1f1(n+0.5*dfn, 0.5*dfn, 0.5*nc)
  5850. # return val
  5851. def _stats(self, dfn, dfd, nc, moments='mv'):
  5852. mu = scu._ncf_mean(dfn, dfd, nc)
  5853. mu2 = scu._ncf_variance(dfn, dfd, nc)
  5854. g1 = scu._ncf_skewness(dfn, dfd, nc) if 's' in moments else None
  5855. g2 = scu._ncf_kurtosis_excess( # isn't really excess kurtosis!
  5856. dfn, dfd, nc) - 3 if 'k' in moments else None
  5857. # Mathematica: Kurtosis[NoncentralFRatioDistribution[27, 27, 0.415784417992261]]
  5858. return mu, mu2, g1, g2
  5859. ncf = ncf_gen(a=0.0, name='ncf')
  5860. class t_gen(rv_continuous):
  5861. r"""A Student's t continuous random variable.
  5862. For the noncentral t distribution, see `nct`.
  5863. %(before_notes)s
  5864. See Also
  5865. --------
  5866. nct
  5867. Notes
  5868. -----
  5869. The probability density function for `t` is:
  5870. .. math::
  5871. f(x, \nu) = \frac{\Gamma((\nu+1)/2)}
  5872. {\sqrt{\pi \nu} \Gamma(\nu/2)}
  5873. (1+x^2/\nu)^{-(\nu+1)/2}
  5874. where :math:`x` is a real number and the degrees of freedom parameter
  5875. :math:`\nu` (denoted ``df`` in the implementation) satisfies
  5876. :math:`\nu > 0`. :math:`\Gamma` is the gamma function
  5877. (`scipy.special.gamma`).
  5878. %(after_notes)s
  5879. %(example)s
  5880. """
  5881. def _shape_info(self):
  5882. return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
  5883. def _rvs(self, df, size=None, random_state=None):
  5884. return random_state.standard_t(df, size=size)
  5885. def _pdf(self, x, df):
  5886. return xpx.apply_where(
  5887. df == np.inf, (x, df),
  5888. lambda x, df: norm._pdf(x),
  5889. lambda x, df: np.exp(self._logpdf(x, df)))
  5890. def _logpdf(self, x, df):
  5891. def t_logpdf(x, df):
  5892. return (np.log(sc.poch(0.5 * df, 0.5))
  5893. - 0.5 * (np.log(df) + np.log(np.pi))
  5894. - (df + 1)/2*np.log1p(x * x/df))
  5895. def norm_logpdf(x, df):
  5896. return norm._logpdf(x)
  5897. return xpx.apply_where(df == np.inf, (x, df), norm_logpdf, t_logpdf)
  5898. def _cdf(self, x, df):
  5899. return sc.stdtr(df, x)
  5900. def _sf(self, x, df):
  5901. return sc.stdtr(df, -x)
  5902. def _ppf(self, q, df):
  5903. return sc.stdtrit(df, q)
  5904. def _isf(self, q, df):
  5905. return -sc.stdtrit(df, q)
  5906. def _stats(self, df):
  5907. # infinite df -> normal distribution (0.0, 1.0, 0.0, 0.0)
  5908. infinite_df = np.isposinf(df)
  5909. mu = np.where(df > 1, 0.0, np.inf)
  5910. condlist = ((df > 1) & (df <= 2),
  5911. (df > 2) & np.isfinite(df),
  5912. infinite_df)
  5913. choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
  5914. lambda df: df / (df-2.0),
  5915. lambda df: np.broadcast_to(1, df.shape))
  5916. mu2 = _lazyselect(condlist, choicelist, (df,), np.nan)
  5917. g1 = np.where(df > 3, 0.0, np.nan)
  5918. condlist = ((df > 2) & (df <= 4),
  5919. (df > 4) & np.isfinite(df),
  5920. infinite_df)
  5921. choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
  5922. lambda df: 6.0 / (df-4.0),
  5923. lambda df: np.broadcast_to(0, df.shape))
  5924. g2 = _lazyselect(condlist, choicelist, (df,), np.nan)
  5925. return mu, mu2, g1, g2
  5926. def _entropy(self, df):
  5927. if df == np.inf:
  5928. return norm._entropy()
  5929. def regular(df):
  5930. half = df/2
  5931. half1 = (df + 1)/2
  5932. return (half1*(sc.digamma(half1) - sc.digamma(half))
  5933. + np.log(np.sqrt(df)*sc.beta(half, 0.5)))
  5934. def asymptotic(df):
  5935. # Formula from Wolfram Alpha:
  5936. # "asymptotic expansion (d+1)/2 * (digamma((d+1)/2) - digamma(d/2))
  5937. # + log(sqrt(d) * beta(d/2, 1/2))"
  5938. h = (norm._entropy() + 1/df + (df**-2.)/4 - (df**-3.)/6
  5939. - (df**-4.)/8 + 3/10*(df**-5.) + (df**-6.)/4)
  5940. return h
  5941. return xpx.apply_where(df >= 100, df, asymptotic, regular)
  5942. t = t_gen(name='t')
  5943. class nct_gen(rv_continuous):
  5944. r"""A non-central Student's t continuous random variable.
  5945. %(before_notes)s
  5946. Notes
  5947. -----
  5948. If :math:`Y` is a standard normal random variable and :math:`V` is
  5949. an independent chi-square random variable (`chi2`) with :math:`k` degrees
  5950. of freedom, then
  5951. .. math::
  5952. X = \frac{Y + c}{\sqrt{V/k}}
  5953. has a non-central Student's t distribution on the real line.
  5954. The degrees of freedom parameter :math:`k` (denoted ``df`` in the
  5955. implementation) satisfies :math:`k > 0` and the noncentrality parameter
  5956. :math:`c` (denoted ``nc`` in the implementation) is a real number.
  5957. This distribution uses routines from the Boost Math C++ library for
  5958. the computation of the ``pdf``, ``cdf``, ``ppf``, ``sf`` and ``isf``
  5959. methods. [1]_
  5960. %(after_notes)s
  5961. References
  5962. ----------
  5963. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  5964. %(example)s
  5965. """
  5966. def _argcheck(self, df, nc):
  5967. return (df > 0) & (nc == nc)
  5968. def _shape_info(self):
  5969. idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
  5970. inc = _ShapeInfo("nc", False, (-np.inf, np.inf), (False, False))
  5971. return [idf, inc]
  5972. def _rvs(self, df, nc, size=None, random_state=None):
  5973. n = norm.rvs(loc=nc, size=size, random_state=random_state)
  5974. c2 = chi2.rvs(df, size=size, random_state=random_state)
  5975. return n * np.sqrt(df) / np.sqrt(c2)
  5976. def _pdf(self, x, df, nc):
  5977. return scu._nct_pdf(x, df, nc)
  5978. def _cdf(self, x, df, nc):
  5979. return sc.nctdtr(df, nc, x)
  5980. def _ppf(self, q, df, nc):
  5981. return sc.nctdtrit(df, nc, q)
  5982. def _sf(self, x, df, nc):
  5983. with np.errstate(over='ignore'): # see gh-17432
  5984. return np.clip(scu._nct_sf(x, df, nc), 0, 1)
  5985. def _isf(self, x, df, nc):
  5986. with np.errstate(over='ignore'): # see gh-17432
  5987. return scu._nct_isf(x, df, nc)
  5988. def _stats(self, df, nc, moments='mv'):
  5989. mu = scu._nct_mean(df, nc)
  5990. mu2 = scu._nct_variance(df, nc)
  5991. g1 = scu._nct_skewness(df, nc) if 's' in moments else None
  5992. g2 = scu._nct_kurtosis_excess(df, nc) if 'k' in moments else None
  5993. return mu, mu2, g1, g2
  5994. nct = nct_gen(name="nct")
  5995. class pareto_gen(rv_continuous):
  5996. r"""A Pareto continuous random variable.
  5997. %(before_notes)s
  5998. Notes
  5999. -----
  6000. The probability density function for `pareto` is:
  6001. .. math::
  6002. f(x, b) = \frac{b}{x^{b+1}}
  6003. for :math:`x \ge 1`, :math:`b > 0`.
  6004. `pareto` takes ``b`` as a shape parameter for :math:`b`.
  6005. %(after_notes)s
  6006. %(example)s
  6007. """
  6008. def _shape_info(self):
  6009. return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
  6010. def _pdf(self, x, b):
  6011. # pareto.pdf(x, b) = b / x**(b+1)
  6012. return b * x**(-b-1)
  6013. def _cdf(self, x, b):
  6014. return 1 - x**(-b)
  6015. def _ppf(self, q, b):
  6016. return pow(1-q, -1.0/b)
  6017. def _sf(self, x, b):
  6018. return x**(-b)
  6019. def _isf(self, q, b):
  6020. return np.power(q, -1.0 / b)
  6021. def _stats(self, b, moments='mv'):
  6022. mu, mu2, g1, g2 = None, None, None, None
  6023. if 'm' in moments:
  6024. mask = b > 1
  6025. bt = np.extract(mask, b)
  6026. mu = np.full(np.shape(b), fill_value=np.inf)
  6027. np.place(mu, mask, bt / (bt-1.0))
  6028. if 'v' in moments:
  6029. mask = b > 2
  6030. bt = np.extract(mask, b)
  6031. mu2 = np.full(np.shape(b), fill_value=np.inf)
  6032. np.place(mu2, mask, bt / (bt-2.0) / (bt-1.0)**2)
  6033. if 's' in moments:
  6034. mask = b > 3
  6035. bt = np.extract(mask, b)
  6036. g1 = np.full(np.shape(b), fill_value=np.nan)
  6037. vals = 2 * (bt + 1.0) * np.sqrt(bt - 2.0) / ((bt - 3.0) * np.sqrt(bt))
  6038. np.place(g1, mask, vals)
  6039. if 'k' in moments:
  6040. mask = b > 4
  6041. bt = np.extract(mask, b)
  6042. g2 = np.full(np.shape(b), fill_value=np.nan)
  6043. vals = (6.0*np.polyval([1.0, 1.0, -6, -2], bt) /
  6044. np.polyval([1.0, -7.0, 12.0, 0.0], bt))
  6045. np.place(g2, mask, vals)
  6046. return mu, mu2, g1, g2
  6047. def _entropy(self, b):
  6048. return 1 + 1.0/b - np.log(b)
  6049. @_call_super_mom
  6050. @inherit_docstring_from(rv_continuous)
  6051. def fit(self, data, *args, **kwds):
  6052. parameters = _check_fit_input_parameters(self, data, args, kwds)
  6053. data, fshape, floc, fscale = parameters
  6054. # ensure that any fixed parameters don't violate constraints of the
  6055. # distribution before continuing.
  6056. if floc is not None and np.min(data) - floc < (fscale or 0):
  6057. raise FitDataError("pareto", lower=1, upper=np.inf)
  6058. ndata = data.shape[0]
  6059. def get_shape(scale, location):
  6060. # The first-order necessary condition on `shape` can be solved in
  6061. # closed form
  6062. return ndata / np.sum(np.log((data - location) / scale))
  6063. if floc is fscale is None:
  6064. # The support of the distribution is `(x - loc)/scale > 0`.
  6065. # The method of Lagrange multipliers turns this constraint
  6066. # into an equation that can be solved numerically.
  6067. # See gh-12545 for details.
  6068. def dL_dScale(shape, scale):
  6069. # The partial derivative of the log-likelihood function w.r.t.
  6070. # the scale.
  6071. return ndata * shape / scale
  6072. def dL_dLocation(shape, location):
  6073. # The partial derivative of the log-likelihood function w.r.t.
  6074. # the location.
  6075. return (shape + 1) * np.sum(1 / (data - location))
  6076. def fun_to_solve(scale):
  6077. # optimize the scale by setting the partial derivatives
  6078. # w.r.t. to location and scale equal and solving.
  6079. location = np.min(data) - scale
  6080. shape = fshape or get_shape(scale, location)
  6081. return dL_dLocation(shape, location) - dL_dScale(shape, scale)
  6082. def interval_contains_root(lbrack, rbrack):
  6083. # return true if the signs disagree.
  6084. return (np.sign(fun_to_solve(lbrack)) !=
  6085. np.sign(fun_to_solve(rbrack)))
  6086. # set brackets for `root_scalar` to use when optimizing over the
  6087. # scale such that a root is likely between them. Use user supplied
  6088. # guess or default 1.
  6089. brack_start = float(kwds.get('scale', 1))
  6090. lbrack, rbrack = brack_start / 2, brack_start * 2
  6091. # if a root is not between the brackets, iteratively expand them
  6092. # until they include a sign change, checking after each bracket is
  6093. # modified.
  6094. while (not interval_contains_root(lbrack, rbrack)
  6095. and (lbrack > 0 or rbrack < np.inf)):
  6096. lbrack /= 2
  6097. rbrack *= 2
  6098. res = root_scalar(fun_to_solve, bracket=[lbrack, rbrack])
  6099. if res.converged:
  6100. scale = res.root
  6101. loc = np.min(data) - scale
  6102. shape = fshape or get_shape(scale, loc)
  6103. # The Pareto distribution requires that its parameters satisfy
  6104. # the condition `fscale + floc <= min(data)`. However, to
  6105. # avoid numerical issues, we require that `fscale + floc`
  6106. # is strictly less than `min(data)`. If this condition
  6107. # is not satisfied, reduce the scale with `np.nextafter` to
  6108. # ensure that data does not fall outside of the support.
  6109. if not (scale + loc) < np.min(data):
  6110. scale = np.min(data) - loc
  6111. scale = np.nextafter(scale, 0)
  6112. return shape, loc, scale
  6113. else:
  6114. return super().fit(data, **kwds)
  6115. elif floc is None:
  6116. loc = np.min(data) - fscale
  6117. else:
  6118. loc = floc
  6119. # Source: Evans, Hastings, and Peacock (2000), Statistical
  6120. # Distributions, 3rd. Ed., John Wiley and Sons. Page 149.
  6121. scale = fscale or np.min(data) - loc
  6122. shape = fshape or get_shape(scale, loc)
  6123. return shape, loc, scale
  6124. pareto = pareto_gen(a=1.0, name="pareto")
  6125. class lomax_gen(rv_continuous):
  6126. r"""A Lomax (Pareto of the second kind) continuous random variable.
  6127. %(before_notes)s
  6128. Notes
  6129. -----
  6130. The probability density function for `lomax` is:
  6131. .. math::
  6132. f(x, c) = \frac{c}{(1+x)^{c+1}}
  6133. for :math:`x \ge 0`, :math:`c > 0`.
  6134. `lomax` takes ``c`` as a shape parameter for :math:`c`.
  6135. `lomax` is a special case of `pareto` with ``loc=-1.0``.
  6136. %(after_notes)s
  6137. %(example)s
  6138. """
  6139. def _shape_info(self):
  6140. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  6141. def _pdf(self, x, c):
  6142. # lomax.pdf(x, c) = c / (1+x)**(c+1)
  6143. return c*1.0/(1.0+x)**(c+1.0)
  6144. def _logpdf(self, x, c):
  6145. return np.log(c) - (c+1)*sc.log1p(x)
  6146. def _cdf(self, x, c):
  6147. return -sc.expm1(-c*sc.log1p(x))
  6148. def _sf(self, x, c):
  6149. return np.exp(-c*sc.log1p(x))
  6150. def _logsf(self, x, c):
  6151. return -c*sc.log1p(x)
  6152. def _ppf(self, q, c):
  6153. return sc.expm1(-sc.log1p(-q)/c)
  6154. def _isf(self, q, c):
  6155. return q**(-1.0 / c) - 1
  6156. def _stats(self, c):
  6157. mu, mu2, g1, g2 = pareto.stats(c, loc=-1.0, moments='mvsk')
  6158. return mu, mu2, g1, g2
  6159. def _entropy(self, c):
  6160. return 1+1.0/c-np.log(c)
  6161. lomax = lomax_gen(a=0.0, name="lomax")
  6162. class pearson3_gen(rv_continuous):
  6163. r"""A pearson type III continuous random variable.
  6164. %(before_notes)s
  6165. Notes
  6166. -----
  6167. The probability density function for `pearson3` is:
  6168. .. math::
  6169. f(x, \kappa) = \frac{|\beta|}{\Gamma(\alpha)}
  6170. (\beta (x - \zeta))^{\alpha - 1}
  6171. \exp(-\beta (x - \zeta))
  6172. where:
  6173. .. math::
  6174. \beta = \frac{2}{\kappa}
  6175. \alpha = \beta^2 = \frac{4}{\kappa^2}
  6176. \zeta = -\frac{\alpha}{\beta} = -\beta
  6177. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  6178. Pass the skew :math:`\kappa` into `pearson3` as the shape parameter
  6179. ``skew``.
  6180. %(after_notes)s
  6181. %(example)s
  6182. References
  6183. ----------
  6184. R.W. Vogel and D.E. McMartin, "Probability Plot Goodness-of-Fit and
  6185. Skewness Estimation Procedures for the Pearson Type 3 Distribution", Water
  6186. Resources Research, Vol.27, 3149-3158 (1991).
  6187. L.R. Salvosa, "Tables of Pearson's Type III Function", Ann. Math. Statist.,
  6188. Vol.1, 191-198 (1930).
  6189. "Using Modern Computing Tools to Fit the Pearson Type III Distribution to
  6190. Aviation Loads Data", Office of Aviation Research (2003).
  6191. """
  6192. def _preprocess(self, x, skew):
  6193. # The real 'loc' and 'scale' are handled in the calling pdf(...). The
  6194. # local variables 'loc' and 'scale' within pearson3._pdf are set to
  6195. # the defaults just to keep them as part of the equations for
  6196. # documentation.
  6197. loc = 0.0
  6198. scale = 1.0
  6199. # If skew is small, return _norm_pdf. The divide between pearson3
  6200. # and norm was found by brute force and is approximately a skew of
  6201. # 0.000016. No one, I hope, would actually use a skew value even
  6202. # close to this small.
  6203. norm2pearson_transition = 0.000016
  6204. ans, x, skew = np.broadcast_arrays(1.0, x, skew)
  6205. ans = ans.copy()
  6206. # mask is True where skew is small enough to use the normal approx.
  6207. mask = np.absolute(skew) < norm2pearson_transition
  6208. invmask = ~mask
  6209. beta = 2.0 / (skew[invmask] * scale)
  6210. alpha = (scale * beta)**2
  6211. zeta = loc - alpha / beta
  6212. transx = beta * (x[invmask] - zeta)
  6213. return ans, x, transx, mask, invmask, beta, alpha, zeta
  6214. def _argcheck(self, skew):
  6215. # The _argcheck function in rv_continuous only allows positive
  6216. # arguments. The skew argument for pearson3 can be zero (which I want
  6217. # to handle inside pearson3._pdf) or negative. So just return True
  6218. # for all skew args.
  6219. return np.isfinite(skew)
  6220. def _shape_info(self):
  6221. return [_ShapeInfo("skew", False, (-np.inf, np.inf), (False, False))]
  6222. def _stats(self, skew):
  6223. m = 0.0
  6224. v = 1.0
  6225. s = skew
  6226. k = 1.5*skew**2
  6227. return m, v, s, k
  6228. def _pdf(self, x, skew):
  6229. # pearson3.pdf(x, skew) = abs(beta) / gamma(alpha) *
  6230. # (beta * (x - zeta))**(alpha - 1) * exp(-beta*(x - zeta))
  6231. # Do the calculation in _logpdf since helps to limit
  6232. # overflow/underflow problems
  6233. ans = np.exp(self._logpdf(x, skew))
  6234. if ans.ndim == 0:
  6235. if np.isnan(ans):
  6236. return 0.0
  6237. return ans
  6238. ans[np.isnan(ans)] = 0.0
  6239. return ans
  6240. def _logpdf(self, x, skew):
  6241. # PEARSON3 logpdf GAMMA logpdf
  6242. # np.log(abs(beta))
  6243. # + (alpha - 1)*np.log(beta*(x - zeta)) + (a - 1)*np.log(x)
  6244. # - beta*(x - zeta) - x
  6245. # - sc.gammalnalpha) - sc.gammalna)
  6246. ans, x, transx, mask, invmask, beta, alpha, _ = (
  6247. self._preprocess(x, skew))
  6248. ans[mask] = np.log(_norm_pdf(x[mask]))
  6249. # use logpdf instead of _logpdf to fix issue mentioned in gh-12640
  6250. # (_logpdf does not return correct result for alpha = 1)
  6251. ans[invmask] = np.log(abs(beta)) + gamma.logpdf(transx, alpha)
  6252. return ans
  6253. def _cdf(self, x, skew):
  6254. ans, x, transx, mask, invmask, _, alpha, _ = (
  6255. self._preprocess(x, skew))
  6256. ans[mask] = _norm_cdf(x[mask])
  6257. skew = np.broadcast_to(skew, invmask.shape)
  6258. invmask1a = np.logical_and(invmask, skew > 0)
  6259. invmask1b = skew[invmask] > 0
  6260. # use cdf instead of _cdf to fix issue mentioned in gh-12640
  6261. # (_cdf produces NaNs for inputs outside support)
  6262. ans[invmask1a] = gamma.cdf(transx[invmask1b], alpha[invmask1b])
  6263. # The gamma._cdf approach wasn't working with negative skew.
  6264. # Note that multiplying the skew by -1 reflects about x=0.
  6265. # So instead of evaluating the CDF with negative skew at x,
  6266. # evaluate the SF with positive skew at -x.
  6267. invmask2a = np.logical_and(invmask, skew < 0)
  6268. invmask2b = skew[invmask] < 0
  6269. # gamma._sf produces NaNs when transx < 0, so use gamma.sf
  6270. ans[invmask2a] = gamma.sf(transx[invmask2b], alpha[invmask2b])
  6271. return ans
  6272. def _sf(self, x, skew):
  6273. ans, x, transx, mask, invmask, _, alpha, _ = (
  6274. self._preprocess(x, skew))
  6275. ans[mask] = _norm_sf(x[mask])
  6276. skew = np.broadcast_to(skew, invmask.shape)
  6277. invmask1a = np.logical_and(invmask, skew > 0)
  6278. invmask1b = skew[invmask] > 0
  6279. ans[invmask1a] = gamma.sf(transx[invmask1b], alpha[invmask1b])
  6280. invmask2a = np.logical_and(invmask, skew < 0)
  6281. invmask2b = skew[invmask] < 0
  6282. ans[invmask2a] = gamma.cdf(transx[invmask2b], alpha[invmask2b])
  6283. return ans
  6284. def _rvs(self, skew, size=None, random_state=None):
  6285. skew = np.broadcast_to(skew, size)
  6286. ans, _, _, mask, invmask, beta, alpha, zeta = (
  6287. self._preprocess([0], skew))
  6288. nsmall = mask.sum()
  6289. nbig = mask.size - nsmall
  6290. ans[mask] = random_state.standard_normal(nsmall)
  6291. ans[invmask] = random_state.standard_gamma(alpha, nbig)/beta + zeta
  6292. if size == ():
  6293. ans = ans[0]
  6294. return ans
  6295. def _ppf(self, q, skew):
  6296. ans, q, _, mask, invmask, beta, alpha, zeta = (
  6297. self._preprocess(q, skew))
  6298. ans[mask] = _norm_ppf(q[mask])
  6299. q = q[invmask]
  6300. q[beta < 0] = 1 - q[beta < 0] # for negative skew; see gh-17050
  6301. ans[invmask] = sc.gammaincinv(alpha, q)/beta + zeta
  6302. return ans
  6303. @_call_super_mom
  6304. @extend_notes_in_docstring(rv_continuous, notes="""\
  6305. Note that method of moments (`method='MM'`) is not
  6306. available for this distribution.\n\n""")
  6307. def fit(self, data, *args, **kwds):
  6308. if kwds.get("method", None) == 'MM':
  6309. raise NotImplementedError("Fit `method='MM'` is not available for "
  6310. "the Pearson3 distribution. Please try "
  6311. "the default `method='MLE'`.")
  6312. else:
  6313. return super(type(self), self).fit(data, *args, **kwds)
  6314. pearson3 = pearson3_gen(name="pearson3")
  6315. class powerlaw_gen(rv_continuous):
  6316. r"""A power-function continuous random variable.
  6317. %(before_notes)s
  6318. See Also
  6319. --------
  6320. pareto
  6321. Notes
  6322. -----
  6323. The probability density function for `powerlaw` is:
  6324. .. math::
  6325. f(x, a) = a x^{a-1}
  6326. for :math:`0 \le x \le 1`, :math:`a > 0`.
  6327. `powerlaw` takes ``a`` as a shape parameter for :math:`a`.
  6328. %(after_notes)s
  6329. For example, the support of `powerlaw` can be adjusted from the default
  6330. interval ``[0, 1]`` to the interval ``[c, c+d]`` by setting ``loc=c`` and
  6331. ``scale=d``. For a power-law distribution with infinite support, see
  6332. `pareto`. For a power-law distribution described by PDF:
  6333. .. math::
  6334. f(x; a, l, h) = \frac{a}{h^a - l^2} x^{a-1}
  6335. with :math:`a \neq 0` and :math:`0 < l < x < h`, see `truncpareto`.
  6336. `powerlaw` is a special case of `beta` with ``b=1``.
  6337. %(example)s
  6338. """
  6339. def _shape_info(self):
  6340. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  6341. def _pdf(self, x, a):
  6342. # powerlaw.pdf(x, a) = a * x**(a-1)
  6343. return a*x**(a-1.0)
  6344. def _logpdf(self, x, a):
  6345. return np.log(a) + sc.xlogy(a - 1, x)
  6346. def _cdf(self, x, a):
  6347. return x**(a*1.0)
  6348. def _logcdf(self, x, a):
  6349. return a*np.log(x)
  6350. def _ppf(self, q, a):
  6351. return pow(q, 1.0/a)
  6352. def _sf(self, p, a):
  6353. return -sc.powm1(p, a)
  6354. def _munp(self, n, a):
  6355. # The following expression is correct for all real n (provided a > 0).
  6356. return a / (a + n)
  6357. def _stats(self, a):
  6358. return (a / (a + 1.0),
  6359. a / (a + 2.0) / (a + 1.0) ** 2,
  6360. -2.0 * ((a - 1.0) / (a + 3.0)) * np.sqrt((a + 2.0) / a),
  6361. 6 * np.polyval([1, -1, -6, 2], a) / (a * (a + 3.0) * (a + 4)))
  6362. def _entropy(self, a):
  6363. return 1 - 1.0/a - np.log(a)
  6364. def _support_mask(self, x, a):
  6365. return (super()._support_mask(x, a)
  6366. & ((x != 0) | (a >= 1)))
  6367. @_call_super_mom
  6368. @extend_notes_in_docstring(rv_continuous, notes="""\
  6369. Notes specifically for ``powerlaw.fit``: If the location is a free
  6370. parameter and the value returned for the shape parameter is less than
  6371. one, the true maximum likelihood approaches infinity. This causes
  6372. numerical difficulties, and the resulting estimates are approximate.
  6373. \n\n""")
  6374. def fit(self, data, *args, **kwds):
  6375. # Summary of the strategy:
  6376. #
  6377. # 1) If the scale and location are fixed, return the shape according
  6378. # to a formula.
  6379. #
  6380. # 2) If the scale is fixed, there are two possibilities for the other
  6381. # parameters - one corresponding with shape less than one, and
  6382. # another with shape greater than one. Calculate both, and return
  6383. # whichever has the better log-likelihood.
  6384. #
  6385. # At this point, the scale is known to be free.
  6386. #
  6387. # 3) If the location is fixed, return the scale and shape according to
  6388. # formulas (or, if the shape is fixed, the fixed shape).
  6389. #
  6390. # At this point, the location and scale are both free. There are
  6391. # separate equations depending on whether the shape is less than one or
  6392. # greater than one.
  6393. #
  6394. # 4a) If the shape is less than one, there are formulas for shape,
  6395. # location, and scale.
  6396. # 4b) If the shape is greater than one, there are formulas for shape
  6397. # and scale, but there is a condition for location to be solved
  6398. # numerically.
  6399. #
  6400. # If the shape is fixed and less than one, we use 4a.
  6401. # If the shape is fixed and greater than one, we use 4b.
  6402. # If the shape is also free, we calculate fits using both 4a and 4b
  6403. # and choose the one that results a better log-likelihood.
  6404. #
  6405. # In many cases, the use of `np.nextafter` is used to avoid numerical
  6406. # issues.
  6407. if kwds.pop('superfit', False):
  6408. return super().fit(data, *args, **kwds)
  6409. if len(np.unique(data)) == 1:
  6410. return super().fit(data, *args, **kwds)
  6411. data, fshape, floc, fscale = _check_fit_input_parameters(self, data,
  6412. args, kwds)
  6413. penalized_nllf_args = [data, (self._fitstart(data),)]
  6414. penalized_nllf = self._reduce_func(penalized_nllf_args, {})[1]
  6415. # ensure that any fixed parameters don't violate constraints of the
  6416. # distribution before continuing. The support of the distribution
  6417. # is `0 < (x - loc)/scale < 1`.
  6418. if floc is not None:
  6419. if not data.min() > floc:
  6420. raise FitDataError('powerlaw', 0, 1)
  6421. if fscale is not None and not data.max() <= floc + fscale:
  6422. raise FitDataError('powerlaw', 0, 1)
  6423. if fscale is not None:
  6424. if fscale <= 0:
  6425. raise ValueError("Negative or zero `fscale` is outside the "
  6426. "range allowed by the distribution.")
  6427. if fscale <= np.ptp(data):
  6428. msg = "`fscale` must be greater than the range of data."
  6429. raise ValueError(msg)
  6430. def get_shape(data, loc, scale):
  6431. # The first-order necessary condition on `shape` can be solved in
  6432. # closed form. It can be used no matter the assumption of the
  6433. # value of the shape.
  6434. N = len(data)
  6435. return - N / (np.sum(np.log(data - loc)) - N*np.log(scale))
  6436. def get_scale(data, loc):
  6437. # analytical solution for `scale` based on the location.
  6438. # It can be used no matter the assumption of the value of the
  6439. # shape.
  6440. return data.max() - loc
  6441. # 1) The location and scale are both fixed. Analytically determine the
  6442. # shape.
  6443. if fscale is not None and floc is not None:
  6444. return get_shape(data, floc, fscale), floc, fscale
  6445. # 2) The scale is fixed. There are two possibilities for the other
  6446. # parameters. Choose the option with better log-likelihood.
  6447. if fscale is not None:
  6448. # using `data.min()` as the optimal location
  6449. loc_lt1 = np.nextafter(data.min(), -np.inf)
  6450. shape_lt1 = fshape or get_shape(data, loc_lt1, fscale)
  6451. ll_lt1 = penalized_nllf((shape_lt1, loc_lt1, fscale), data)
  6452. # using `data.max() - scale` as the optimal location
  6453. loc_gt1 = np.nextafter(data.max() - fscale, np.inf)
  6454. shape_gt1 = fshape or get_shape(data, loc_gt1, fscale)
  6455. ll_gt1 = penalized_nllf((shape_gt1, loc_gt1, fscale), data)
  6456. if ll_lt1 < ll_gt1:
  6457. return shape_lt1, loc_lt1, fscale
  6458. else:
  6459. return shape_gt1, loc_gt1, fscale
  6460. # 3) The location is fixed. Return the analytical scale and the
  6461. # analytical (or fixed) shape.
  6462. if floc is not None:
  6463. scale = get_scale(data, floc)
  6464. shape = fshape or get_shape(data, floc, scale)
  6465. return shape, floc, scale
  6466. # 4) Location and scale are both free
  6467. # 4a) Use formulas that assume `shape <= 1`.
  6468. def fit_loc_scale_w_shape_lt_1():
  6469. loc = np.nextafter(data.min(), -np.inf)
  6470. if np.abs(loc) < np.finfo(loc.dtype).tiny:
  6471. loc = np.sign(loc) * np.finfo(loc.dtype).tiny
  6472. scale = np.nextafter(get_scale(data, loc), np.inf)
  6473. shape = fshape or get_shape(data, loc, scale)
  6474. return shape, loc, scale
  6475. # 4b) Fit under the assumption that `shape > 1`. The support
  6476. # of the distribution is `(x - loc)/scale <= 1`. The method of Lagrange
  6477. # multipliers turns this constraint into the condition that
  6478. # dL_dScale - dL_dLocation must be zero, which is solved numerically.
  6479. # (Alternatively, substitute the constraint into the objective
  6480. # function before deriving the likelihood equation for location.)
  6481. def dL_dScale(data, shape, scale):
  6482. # The partial derivative of the log-likelihood function w.r.t.
  6483. # the scale.
  6484. return -data.shape[0] * shape / scale
  6485. def dL_dLocation(data, shape, loc):
  6486. # The partial derivative of the log-likelihood function w.r.t.
  6487. # the location.
  6488. return (shape - 1) * np.sum(1 / (loc - data)) # -1/(data-loc)
  6489. def dL_dLocation_star(loc):
  6490. # The derivative of the log-likelihood function w.r.t.
  6491. # the location, given optimal shape and scale
  6492. scale = np.nextafter(get_scale(data, loc), -np.inf)
  6493. shape = fshape or get_shape(data, loc, scale)
  6494. return dL_dLocation(data, shape, loc)
  6495. def fun_to_solve(loc):
  6496. # optimize the location by setting the partial derivatives
  6497. # w.r.t. to location and scale equal and solving.
  6498. scale = np.nextafter(get_scale(data, loc), -np.inf)
  6499. shape = fshape or get_shape(data, loc, scale)
  6500. return (dL_dScale(data, shape, scale)
  6501. - dL_dLocation(data, shape, loc))
  6502. def fit_loc_scale_w_shape_gt_1():
  6503. # set brackets for `root_scalar` to use when optimizing over the
  6504. # location such that a root is likely between them.
  6505. rbrack = np.nextafter(data.min(), -np.inf)
  6506. # if the sign of `dL_dLocation_star` is positive at rbrack,
  6507. # we're not going to find the root we're looking for
  6508. delta = (data.min() - rbrack)
  6509. while dL_dLocation_star(rbrack) > 0:
  6510. rbrack = data.min() - delta
  6511. delta *= 2
  6512. def interval_contains_root(lbrack, rbrack):
  6513. # Check if the interval (lbrack, rbrack) contains the root.
  6514. return (np.sign(fun_to_solve(lbrack))
  6515. != np.sign(fun_to_solve(rbrack)))
  6516. lbrack = rbrack - 1
  6517. # if the sign doesn't change between the brackets, move the left
  6518. # bracket until it does. (The right bracket remains fixed at the
  6519. # maximum permissible value.)
  6520. i = 1.0
  6521. while (not interval_contains_root(lbrack, rbrack)
  6522. and lbrack != -np.inf):
  6523. lbrack = (data.min() - i)
  6524. i *= 2
  6525. root = optimize.root_scalar(fun_to_solve, bracket=(lbrack, rbrack))
  6526. loc = np.nextafter(root.root, -np.inf)
  6527. scale = np.nextafter(get_scale(data, loc), np.inf)
  6528. shape = fshape or get_shape(data, loc, scale)
  6529. return shape, loc, scale
  6530. # Shape is fixed - choose 4a or 4b accordingly.
  6531. if fshape is not None and fshape <= 1:
  6532. return fit_loc_scale_w_shape_lt_1()
  6533. elif fshape is not None and fshape > 1:
  6534. return fit_loc_scale_w_shape_gt_1()
  6535. # Shape is free
  6536. fit_shape_lt1 = fit_loc_scale_w_shape_lt_1()
  6537. ll_lt1 = self.nnlf(fit_shape_lt1, data)
  6538. fit_shape_gt1 = fit_loc_scale_w_shape_gt_1()
  6539. ll_gt1 = self.nnlf(fit_shape_gt1, data)
  6540. if ll_lt1 <= ll_gt1 and fit_shape_lt1[0] <= 1:
  6541. return fit_shape_lt1
  6542. elif ll_lt1 > ll_gt1 and fit_shape_gt1[0] > 1:
  6543. return fit_shape_gt1
  6544. else:
  6545. return super().fit(data, *args, **kwds)
  6546. powerlaw = powerlaw_gen(a=0.0, b=1.0, name="powerlaw")
  6547. class powerlognorm_gen(rv_continuous):
  6548. r"""A power log-normal continuous random variable.
  6549. %(before_notes)s
  6550. Notes
  6551. -----
  6552. The probability density function for `powerlognorm` is:
  6553. .. math::
  6554. f(x, c, s) = \frac{c}{x s} \phi(\log(x)/s)
  6555. (\Phi(-\log(x)/s))^{c-1}
  6556. where :math:`\phi` is the normal pdf, and :math:`\Phi` is the normal cdf,
  6557. and :math:`x > 0`, :math:`s, c > 0`.
  6558. `powerlognorm` takes :math:`c` and :math:`s` as shape parameters.
  6559. %(after_notes)s
  6560. %(example)s
  6561. """
  6562. _support_mask = rv_continuous._open_support_mask
  6563. def _shape_info(self):
  6564. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  6565. i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
  6566. return [ic, i_s]
  6567. def _pdf(self, x, c, s):
  6568. return np.exp(self._logpdf(x, c, s))
  6569. def _logpdf(self, x, c, s):
  6570. return (np.log(c) - np.log(x) - np.log(s) +
  6571. _norm_logpdf(np.log(x) / s) +
  6572. _norm_logcdf(-np.log(x) / s) * (c - 1.))
  6573. def _cdf(self, x, c, s):
  6574. return -sc.expm1(self._logsf(x, c, s))
  6575. def _ppf(self, q, c, s):
  6576. return self._isf(1 - q, c, s)
  6577. def _sf(self, x, c, s):
  6578. return np.exp(self._logsf(x, c, s))
  6579. def _logsf(self, x, c, s):
  6580. return _norm_logcdf(-np.log(x) / s) * c
  6581. def _isf(self, q, c, s):
  6582. return np.exp(-_norm_ppf(q**(1/c)) * s)
  6583. powerlognorm = powerlognorm_gen(a=0.0, name="powerlognorm")
  6584. class powernorm_gen(rv_continuous):
  6585. r"""A power normal continuous random variable.
  6586. %(before_notes)s
  6587. Notes
  6588. -----
  6589. The probability density function for `powernorm` is:
  6590. .. math::
  6591. f(x, c) = c \phi(x) (\Phi(-x))^{c-1}
  6592. where :math:`\phi` is the normal pdf, :math:`\Phi` is the normal cdf,
  6593. :math:`x` is any real, and :math:`c > 0` [1]_.
  6594. `powernorm` takes ``c`` as a shape parameter for :math:`c`.
  6595. %(after_notes)s
  6596. References
  6597. ----------
  6598. .. [1] NIST Engineering Statistics Handbook, Section 1.3.6.6.13,
  6599. https://www.itl.nist.gov/div898/handbook//eda/section3/eda366d.htm
  6600. %(example)s
  6601. """
  6602. def _shape_info(self):
  6603. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  6604. def _pdf(self, x, c):
  6605. # powernorm.pdf(x, c) = c * phi(x) * (Phi(-x))**(c-1)
  6606. return c*_norm_pdf(x) * (_norm_cdf(-x)**(c-1.0))
  6607. def _logpdf(self, x, c):
  6608. return np.log(c) + _norm_logpdf(x) + (c-1)*_norm_logcdf(-x)
  6609. def _cdf(self, x, c):
  6610. return -sc.expm1(self._logsf(x, c))
  6611. def _ppf(self, q, c):
  6612. return -_norm_ppf(pow(1.0 - q, 1.0 / c))
  6613. def _sf(self, x, c):
  6614. return np.exp(self._logsf(x, c))
  6615. def _logsf(self, x, c):
  6616. return c * _norm_logcdf(-x)
  6617. def _isf(self, q, c):
  6618. return -_norm_ppf(np.exp(np.log(q) / c))
  6619. powernorm = powernorm_gen(name='powernorm')
  6620. class rdist_gen(rv_continuous):
  6621. r"""An R-distributed (symmetric beta) continuous random variable.
  6622. %(before_notes)s
  6623. Notes
  6624. -----
  6625. The probability density function for `rdist` is:
  6626. .. math::
  6627. f(x, c) = \frac{(1-x^2)^{c/2-1}}{B(1/2, c/2)}
  6628. for :math:`-1 \le x \le 1`, :math:`c > 0`. `rdist` is also called the
  6629. symmetric beta distribution: if B has a `beta` distribution with
  6630. parameters (c/2, c/2), then X = 2*B - 1 follows a R-distribution with
  6631. parameter c.
  6632. `rdist` takes ``c`` as a shape parameter for :math:`c`.
  6633. This distribution includes the following distribution kernels as
  6634. special cases::
  6635. c = 2: uniform
  6636. c = 3: `semicircular`
  6637. c = 4: Epanechnikov (parabolic)
  6638. c = 6: quartic (biweight)
  6639. c = 8: triweight
  6640. %(after_notes)s
  6641. %(example)s
  6642. """
  6643. def _shape_info(self):
  6644. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  6645. # use relation to the beta distribution for pdf, cdf, etc
  6646. def _pdf(self, x, c):
  6647. return np.exp(self._logpdf(x, c))
  6648. def _logpdf(self, x, c):
  6649. return -np.log(2) + beta._logpdf((x + 1)/2, c/2, c/2)
  6650. def _cdf(self, x, c):
  6651. return beta._cdf((x + 1)/2, c/2, c/2)
  6652. def _sf(self, x, c):
  6653. return beta._sf((x + 1)/2, c/2, c/2)
  6654. def _ppf(self, q, c):
  6655. return 2*beta._ppf(q, c/2, c/2) - 1
  6656. def _rvs(self, c, size=None, random_state=None):
  6657. return 2 * random_state.beta(c/2, c/2, size) - 1
  6658. def _munp(self, n, c):
  6659. numerator = (1 - (n % 2)) * sc.beta((n + 1.0) / 2, c / 2.0)
  6660. return numerator / sc.beta(1. / 2, c / 2.)
  6661. rdist = rdist_gen(a=-1.0, b=1.0, name="rdist")
  6662. class rayleigh_gen(rv_continuous):
  6663. r"""A Rayleigh continuous random variable.
  6664. %(before_notes)s
  6665. Notes
  6666. -----
  6667. The probability density function for `rayleigh` is:
  6668. .. math::
  6669. f(x) = x \exp(-x^2/2)
  6670. for :math:`x \ge 0`.
  6671. `rayleigh` is a special case of `chi` with ``df=2``.
  6672. %(after_notes)s
  6673. %(example)s
  6674. """
  6675. _support_mask = rv_continuous._open_support_mask
  6676. def _shape_info(self):
  6677. return []
  6678. def _rvs(self, size=None, random_state=None):
  6679. return chi.rvs(2, size=size, random_state=random_state)
  6680. def _pdf(self, r):
  6681. # rayleigh.pdf(r) = r * exp(-r**2/2)
  6682. return np.exp(self._logpdf(r))
  6683. def _logpdf(self, r):
  6684. return np.log(r) - 0.5 * r * r
  6685. def _cdf(self, r):
  6686. return -sc.expm1(-0.5 * r**2)
  6687. def _ppf(self, q):
  6688. return np.sqrt(-2 * sc.log1p(-q))
  6689. def _sf(self, r):
  6690. return np.exp(self._logsf(r))
  6691. def _logsf(self, r):
  6692. return -0.5 * r * r
  6693. def _isf(self, q):
  6694. return np.sqrt(-2 * np.log(q))
  6695. def _stats(self):
  6696. val = 4 - np.pi
  6697. return (np.sqrt(np.pi/2),
  6698. val/2,
  6699. 2*(np.pi-3)*np.sqrt(np.pi)/val**1.5,
  6700. 6*np.pi/val-16/val**2)
  6701. def _entropy(self):
  6702. return _EULER/2.0 + 1 - 0.5*np.log(2)
  6703. @_call_super_mom
  6704. @extend_notes_in_docstring(rv_continuous, notes="""\
  6705. Notes specifically for ``rayleigh.fit``: If the location is fixed with
  6706. the `floc` parameter, this method uses an analytical formula to find
  6707. the scale. Otherwise, this function uses a numerical root finder on
  6708. the first order conditions of the log-likelihood function to find the
  6709. MLE. Only the (optional) `loc` parameter is used as the initial guess
  6710. for the root finder; the `scale` parameter and any other parameters
  6711. for the optimizer are ignored.\n\n""")
  6712. def fit(self, data, *args, **kwds):
  6713. if kwds.pop('superfit', False):
  6714. return super().fit(data, *args, **kwds)
  6715. data, floc, fscale = _check_fit_input_parameters(self, data,
  6716. args, kwds)
  6717. def scale_mle(loc):
  6718. # Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
  6719. # and Peacock (2000), Page 175
  6720. return (np.sum((data - loc) ** 2) / (2 * len(data))) ** .5
  6721. def loc_mle(loc):
  6722. # This implicit equation for `loc` is used when
  6723. # both `loc` and `scale` are free.
  6724. xm = data - loc
  6725. s1 = xm.sum()
  6726. s2 = (xm**2).sum()
  6727. s3 = (1/xm).sum()
  6728. return s1 - s2/(2*len(data))*s3
  6729. def loc_mle_scale_fixed(loc, scale=fscale):
  6730. # This implicit equation for `loc` is used when
  6731. # `scale` is fixed but `loc` is not.
  6732. xm = data - loc
  6733. return xm.sum() - scale**2 * (1/xm).sum()
  6734. if floc is not None:
  6735. # `loc` is fixed, analytically determine `scale`.
  6736. if np.any(data - floc <= 0):
  6737. raise FitDataError("rayleigh", lower=1, upper=np.inf)
  6738. else:
  6739. return floc, scale_mle(floc)
  6740. # Account for user provided guess of `loc`.
  6741. loc0 = kwds.get('loc')
  6742. if loc0 is None:
  6743. # Use _fitstart to estimate loc; ignore the returned scale.
  6744. loc0 = self._fitstart(data)[0]
  6745. fun = loc_mle if fscale is None else loc_mle_scale_fixed
  6746. rbrack = np.nextafter(np.min(data), -np.inf)
  6747. lbrack = _get_left_bracket(fun, rbrack)
  6748. res = optimize.root_scalar(fun, bracket=(lbrack, rbrack))
  6749. if not res.converged:
  6750. raise FitSolverError(res.flag)
  6751. loc = res.root
  6752. scale = fscale or scale_mle(loc)
  6753. return loc, scale
  6754. rayleigh = rayleigh_gen(a=0.0, name="rayleigh")
  6755. class reciprocal_gen(rv_continuous):
  6756. r"""A loguniform or reciprocal continuous random variable.
  6757. %(before_notes)s
  6758. Notes
  6759. -----
  6760. The probability density function for this class is:
  6761. .. math::
  6762. f(x, a, b) = \frac{1}{x \log(b/a)}
  6763. for :math:`a \le x \le b`, :math:`b > a > 0`. This class takes
  6764. :math:`a` and :math:`b` as shape parameters.
  6765. %(after_notes)s
  6766. %(example)s
  6767. This doesn't show the equal probability of ``0.01``, ``0.1`` and
  6768. ``1``. This is best when the x-axis is log-scaled:
  6769. >>> import numpy as np
  6770. >>> import matplotlib.pyplot as plt
  6771. >>> fig, ax = plt.subplots(1, 1)
  6772. >>> ax.hist(np.log10(r))
  6773. >>> ax.set_ylabel("Frequency")
  6774. >>> ax.set_xlabel("Value of random variable")
  6775. >>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
  6776. >>> ticks = ["$10^{{ {} }}$".format(i) for i in [-2, -1, 0]]
  6777. >>> ax.set_xticklabels(ticks) # doctest: +SKIP
  6778. >>> plt.show()
  6779. This random variable will be log-uniform regardless of the base chosen for
  6780. ``a`` and ``b``. Let's specify with base ``2`` instead:
  6781. >>> rvs = %(name)s(2**-2, 2**0).rvs(size=1000)
  6782. Values of ``1/4``, ``1/2`` and ``1`` are equally likely with this random
  6783. variable. Here's the histogram:
  6784. >>> fig, ax = plt.subplots(1, 1)
  6785. >>> ax.hist(np.log2(rvs))
  6786. >>> ax.set_ylabel("Frequency")
  6787. >>> ax.set_xlabel("Value of random variable")
  6788. >>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
  6789. >>> ticks = ["$2^{{ {} }}$".format(i) for i in [-2, -1, 0]]
  6790. >>> ax.set_xticklabels(ticks) # doctest: +SKIP
  6791. >>> plt.show()
  6792. """
  6793. def _argcheck(self, a, b):
  6794. return (a > 0) & (b > a)
  6795. def _shape_info(self):
  6796. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  6797. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  6798. return [ia, ib]
  6799. def _fitstart(self, data):
  6800. if isinstance(data, CensoredData):
  6801. data = data._uncensor()
  6802. # Reasonable, since support is [a, b]
  6803. return super()._fitstart(data, args=(np.min(data), np.max(data)))
  6804. def _get_support(self, a, b):
  6805. return a, b
  6806. def _pdf(self, x, a, b):
  6807. # reciprocal.pdf(x, a, b) = 1 / (x*(log(b) - log(a)))
  6808. return np.exp(self._logpdf(x, a, b))
  6809. def _logpdf(self, x, a, b):
  6810. return -np.log(x) - np.log(np.log(b) - np.log(a))
  6811. def _cdf(self, x, a, b):
  6812. return (np.log(x)-np.log(a)) / (np.log(b) - np.log(a))
  6813. def _ppf(self, q, a, b):
  6814. return np.exp(np.log(a) + q*(np.log(b) - np.log(a)))
  6815. def _munp(self, n, a, b):
  6816. if n == 0:
  6817. return 1.0
  6818. t1 = 1 / (np.log(b) - np.log(a)) / n
  6819. t2 = np.real(np.exp(_log_diff(n * np.log(b), n*np.log(a))))
  6820. return t1 * t2
  6821. def _entropy(self, a, b):
  6822. return 0.5*(np.log(a) + np.log(b)) + np.log(np.log(b) - np.log(a))
  6823. fit_note = """\
  6824. `loguniform`/`reciprocal` is over-parameterized. `fit` automatically
  6825. fixes `scale` to 1 unless `fscale` is provided by the user.\n\n"""
  6826. @extend_notes_in_docstring(rv_continuous, notes=fit_note)
  6827. def fit(self, data, *args, **kwds):
  6828. fscale = kwds.pop('fscale', 1)
  6829. return super().fit(data, *args, fscale=fscale, **kwds)
  6830. # Details related to the decision of not defining
  6831. # the survival function for this distribution can be
  6832. # found in the PR: https://github.com/scipy/scipy/pull/18614
  6833. loguniform = reciprocal_gen(name="loguniform")
  6834. reciprocal = reciprocal_gen(name="reciprocal")
  6835. loguniform._support = ('a', 'b')
  6836. reciprocal._support = ('a', 'b')
  6837. class rice_gen(rv_continuous):
  6838. r"""A Rice continuous random variable.
  6839. %(before_notes)s
  6840. Notes
  6841. -----
  6842. The probability density function for `rice` is:
  6843. .. math::
  6844. f(x, b) = x \exp(- \frac{x^2 + b^2}{2}) I_0(x b)
  6845. for :math:`x >= 0`, :math:`b > 0`. :math:`I_0` is the modified Bessel
  6846. function of order zero (`scipy.special.i0`).
  6847. `rice` takes ``b`` as a shape parameter for :math:`b`.
  6848. %(after_notes)s
  6849. The Rice distribution describes the length, :math:`r`, of a 2-D vector with
  6850. components :math:`(U+u, V+v)`, where :math:`U, V` are constant, :math:`u,
  6851. v` are independent Gaussian random variables with standard deviation
  6852. :math:`s`. Let :math:`R = \sqrt{U^2 + V^2}`. Then the pdf of :math:`r` is
  6853. ``rice.pdf(x, R/s, scale=s)``.
  6854. %(example)s
  6855. """
  6856. def _argcheck(self, b):
  6857. return b >= 0
  6858. def _shape_info(self):
  6859. return [_ShapeInfo("b", False, (0, np.inf), (True, False))]
  6860. def _rvs(self, b, size=None, random_state=None):
  6861. # https://en.wikipedia.org/wiki/Rice_distribution
  6862. t = b/np.sqrt(2) + random_state.standard_normal(size=(2,) + size)
  6863. return np.sqrt((t*t).sum(axis=0))
  6864. def _cdf(self, x, b):
  6865. return sc.chndtr(np.square(x), 2, np.square(b))
  6866. def _ppf(self, q, b):
  6867. return np.sqrt(sc.chndtrix(q, 2, np.square(b)))
  6868. def _pdf(self, x, b):
  6869. # rice.pdf(x, b) = x * exp(-(x**2+b**2)/2) * I[0](x*b)
  6870. #
  6871. # We use (x**2 + b**2)/2 = ((x-b)**2)/2 + xb.
  6872. # The factor of np.exp(-xb) is then included in the i0e function
  6873. # in place of the modified Bessel function, i0, improving
  6874. # numerical stability for large values of xb.
  6875. return x * np.exp(-(x-b)*(x-b)/2.0) * sc.i0e(x*b)
  6876. def _munp(self, n, b):
  6877. nd2 = n/2.0
  6878. n1 = 1 + nd2
  6879. b2 = b*b/2.0
  6880. return (2.0**(nd2) * np.exp(-b2) * sc.gamma(n1) *
  6881. sc.hyp1f1(n1, 1, b2))
  6882. rice = rice_gen(a=0.0, name="rice")
  6883. class irwinhall_gen(rv_continuous):
  6884. r"""An Irwin-Hall (Uniform Sum) continuous random variable.
  6885. An `Irwin-Hall <https://en.wikipedia.org/wiki/Irwin-Hall_distribution/>`_
  6886. continuous random variable is the sum of :math:`n` independent
  6887. standard uniform random variables [1]_ [2]_.
  6888. %(before_notes)s
  6889. Notes
  6890. -----
  6891. Applications include `Rao's Spacing Test
  6892. <https://jammalam.faculty.pstat.ucsb.edu/html/favorite/test.htm>`_,
  6893. a more powerful alternative to the Rayleigh test
  6894. when the data are not unimodal, and radar [3]_.
  6895. Conveniently, the pdf and cdf are the :math:`n`-fold convolution of
  6896. the ones for the standard uniform distribution, which is also the
  6897. definition of the cardinal B-splines of degree :math:`n-1`
  6898. having knots evenly spaced from :math:`1` to :math:`n` [4]_ [5]_.
  6899. The Bates distribution, which represents the *mean* of statistically
  6900. independent, uniformly distributed random variables, is simply the
  6901. Irwin-Hall distribution scaled by :math:`1/n`. For example, the frozen
  6902. distribution ``bates = irwinhall(10, scale=1/10)`` represents the
  6903. distribution of the mean of 10 uniformly distributed random variables.
  6904. %(after_notes)s
  6905. References
  6906. ----------
  6907. .. [1] P. Hall, "The distribution of means for samples of size N drawn
  6908. from a population in which the variate takes values between 0 and 1,
  6909. all such values being equally probable",
  6910. Biometrika, Volume 19, Issue 3-4, December 1927, Pages 240-244,
  6911. :doi:`10.1093/biomet/19.3-4.240`.
  6912. .. [2] J. O. Irwin, "On the frequency distribution of the means of samples
  6913. from a population having any law of frequency with finite moments,
  6914. with special reference to Pearson's Type II,
  6915. Biometrika, Volume 19, Issue 3-4, December 1927, Pages 225-239,
  6916. :doi:`0.1093/biomet/19.3-4.225`.
  6917. .. [3] K. Buchanan, T. Adeyemi, C. Flores-Molina, S. Wheeland and D. Overturf,
  6918. "Sidelobe behavior and bandwidth characteristics
  6919. of distributed antenna arrays,"
  6920. 2018 United States National Committee of
  6921. URSI National Radio Science Meeting (USNC-URSI NRSM),
  6922. Boulder, CO, USA, 2018, pp. 1-2.
  6923. https://www.usnc-ursi-archive.org/nrsm/2018/papers/B15-9.pdf.
  6924. .. [4] Amos Ron, "Lecture 1: Cardinal B-splines and convolution operators", p. 1
  6925. https://pages.cs.wisc.edu/~deboor/887/lec1new.pdf.
  6926. .. [5] Trefethen, N. (2012, July). B-splines and convolution. Chebfun.
  6927. Retrieved April 30, 2024, from http://www.chebfun.org/examples/approx/BSplineConv.html.
  6928. %(example)s
  6929. """ # noqa: E501
  6930. @replace_notes_in_docstring(rv_continuous, notes="""\
  6931. Raises a ``NotImplementedError`` for the Irwin-Hall distribution because
  6932. the generic `fit` implementation is unreliable and no custom implementation
  6933. is available. Consider using `scipy.stats.fit`.\n\n""")
  6934. def fit(self, data, *args, **kwds):
  6935. fit_notes = ("The generic `fit` implementation is unreliable for this "
  6936. "distribution, and no custom implementation is available. "
  6937. "Consider using `scipy.stats.fit`.")
  6938. raise NotImplementedError(fit_notes)
  6939. def _argcheck(self, n):
  6940. return (n > 0) & _isintegral(n) & np.isrealobj(n)
  6941. def _get_support(self, n):
  6942. return 0, n
  6943. def _shape_info(self):
  6944. return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
  6945. def _munp(self, order, n):
  6946. # see https://link.springer.com/content/pdf/10.1007/s10959-020-01050-9.pdf
  6947. # page 640, with m=n, j=n+order
  6948. def vmunp(order, n):
  6949. n = np.asarray(n, dtype=np.int64)
  6950. return (sc.stirling2(n+order, n, exact=True)
  6951. / sc.comb(n+order, n, exact=True))
  6952. # exact rationals, but we convert to float anyway
  6953. return np.vectorize(vmunp, otypes=[np.float64])(order, n)
  6954. @staticmethod
  6955. def _cardbspl(n):
  6956. t = np.arange(n+1)
  6957. return BSpline.basis_element(t)
  6958. def _pdf(self, x, n):
  6959. def vpdf(x, n):
  6960. return self._cardbspl(n)(x)
  6961. return np.vectorize(vpdf, otypes=[np.float64])(x, n)
  6962. def _cdf(self, x, n):
  6963. def vcdf(x, n):
  6964. return self._cardbspl(n).antiderivative()(x)
  6965. return np.vectorize(vcdf, otypes=[np.float64])(x, n)
  6966. def _sf(self, x, n):
  6967. def vsf(x, n):
  6968. return self._cardbspl(n).antiderivative()(n-x)
  6969. return np.vectorize(vsf, otypes=[np.float64])(x, n)
  6970. def _rvs(self, n, size=None, random_state=None, *args):
  6971. @_vectorize_rvs_over_shapes
  6972. def _rvs1(n, size=None, random_state=None):
  6973. n = np.floor(n).astype(int)
  6974. usize = (n,) if size is None else (n, *size)
  6975. return random_state.uniform(size=usize).sum(axis=0)
  6976. return _rvs1(n, size=size, random_state=random_state)
  6977. def _stats(self, n):
  6978. # mgf = ((exp(t) - 1)/t)**n
  6979. # m'th derivative follows from the generalized Leibniz rule
  6980. # Moments follow directly from the definition as the sum of n iid unif(0,1)
  6981. # and the summation rules for moments of a sum of iid random variables
  6982. # E(IH((n))) = n*E(U(0,1)) = n/2
  6983. # Var(IH((n))) = n*Var(U(0,1)) = n/12
  6984. # Skew(IH((n))) = Skew(U(0,1))/sqrt(n) = 0
  6985. # Kurt(IH((n))) = Kurt(U(0,1))/n = -6/(5*n) -- Fisher's excess kurtosis
  6986. # See e.g. https://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution
  6987. return n/2, n/12, 0, -6/(5*n)
  6988. irwinhall = irwinhall_gen(name="irwinhall")
  6989. irwinhall._support = (0.0, 'n')
  6990. class recipinvgauss_gen(rv_continuous):
  6991. r"""A reciprocal inverse Gaussian continuous random variable.
  6992. %(before_notes)s
  6993. Notes
  6994. -----
  6995. The probability density function for `recipinvgauss` is:
  6996. .. math::
  6997. f(x, \mu) = \frac{1}{\sqrt{2\pi x}}
  6998. \exp\left(\frac{-(1-\mu x)^2}{2\mu^2x}\right)
  6999. for :math:`x \ge 0`.
  7000. `recipinvgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
  7001. %(after_notes)s
  7002. %(example)s
  7003. """
  7004. def _shape_info(self):
  7005. return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
  7006. def _pdf(self, x, mu):
  7007. # recipinvgauss.pdf(x, mu) =
  7008. # 1/sqrt(2*pi*x) * exp(-(1-mu*x)**2/(2*x*mu**2))
  7009. return np.exp(self._logpdf(x, mu))
  7010. def _logpdf(self, x, mu):
  7011. return xpx.apply_where(
  7012. x > 0, (x, mu),
  7013. lambda x, mu: (-(1 - mu*x)**2.0 / (2*x*mu**2.0)
  7014. - 0.5*np.log(2*np.pi*x)),
  7015. fill_value=-np.inf)
  7016. def _cdf(self, x, mu):
  7017. trm1 = 1.0/mu - x
  7018. trm2 = 1.0/mu + x
  7019. isqx = 1.0/np.sqrt(x)
  7020. return _norm_cdf(-isqx*trm1) - np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
  7021. def _sf(self, x, mu):
  7022. trm1 = 1.0/mu - x
  7023. trm2 = 1.0/mu + x
  7024. isqx = 1.0/np.sqrt(x)
  7025. return _norm_cdf(isqx*trm1) + np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
  7026. def _rvs(self, mu, size=None, random_state=None):
  7027. return 1.0/random_state.wald(mu, 1.0, size=size)
  7028. recipinvgauss = recipinvgauss_gen(a=0.0, name='recipinvgauss')
  7029. class semicircular_gen(rv_continuous):
  7030. r"""A semicircular continuous random variable.
  7031. %(before_notes)s
  7032. See Also
  7033. --------
  7034. rdist
  7035. Notes
  7036. -----
  7037. The probability density function for `semicircular` is:
  7038. .. math::
  7039. f(x) = \frac{2}{\pi} \sqrt{1-x^2}
  7040. for :math:`-1 \le x \le 1`.
  7041. The distribution is a special case of `rdist` with ``c = 3``.
  7042. %(after_notes)s
  7043. References
  7044. ----------
  7045. .. [1] "Wigner semicircle distribution",
  7046. https://en.wikipedia.org/wiki/Wigner_semicircle_distribution
  7047. %(example)s
  7048. """
  7049. def _shape_info(self):
  7050. return []
  7051. def _pdf(self, x):
  7052. return 2.0/np.pi*np.sqrt(1-x*x)
  7053. def _logpdf(self, x):
  7054. return np.log(2/np.pi) + 0.5*sc.log1p(-x*x)
  7055. def _cdf(self, x):
  7056. return 0.5+1.0/np.pi*(x*np.sqrt(1-x*x) + np.arcsin(x))
  7057. def _ppf(self, q):
  7058. return rdist._ppf(q, 3)
  7059. def _rvs(self, size=None, random_state=None):
  7060. # generate values uniformly distributed on the area under the pdf
  7061. # (semi-circle) by randomly generating the radius and angle
  7062. r = np.sqrt(random_state.uniform(size=size))
  7063. a = np.cos(np.pi * random_state.uniform(size=size))
  7064. return r * a
  7065. def _stats(self):
  7066. return 0, 0.25, 0, -1.0
  7067. def _entropy(self):
  7068. return 0.64472988584940017414
  7069. semicircular = semicircular_gen(a=-1.0, b=1.0, name="semicircular")
  7070. class skewcauchy_gen(rv_continuous):
  7071. r"""A skewed Cauchy random variable.
  7072. %(before_notes)s
  7073. See Also
  7074. --------
  7075. cauchy : Cauchy distribution
  7076. Notes
  7077. -----
  7078. The probability density function for `skewcauchy` is:
  7079. .. math::
  7080. f(x) = \frac{1}{\pi \left(\frac{x^2}{\left(a\, \text{sign}(x) + 1
  7081. \right)^2} + 1 \right)}
  7082. for a real number :math:`x` and skewness parameter :math:`-1 < a < 1`.
  7083. When :math:`a=0`, the distribution reduces to the usual Cauchy
  7084. distribution.
  7085. %(after_notes)s
  7086. References
  7087. ----------
  7088. .. [1] "Skewed generalized *t* distribution", Wikipedia
  7089. https://en.wikipedia.org/wiki/Skewed_generalized_t_distribution#Skewed_Cauchy_distribution
  7090. %(example)s
  7091. """
  7092. def _argcheck(self, a):
  7093. return np.abs(a) < 1
  7094. def _shape_info(self):
  7095. return [_ShapeInfo("a", False, (-1.0, 1.0), (False, False))]
  7096. def _pdf(self, x, a):
  7097. return 1 / (np.pi * (x**2 / (a * np.sign(x) + 1)**2 + 1))
  7098. def _cdf(self, x, a):
  7099. return np.where(x <= 0,
  7100. (1 - a) / 2 + (1 - a) / np.pi * np.arctan(x / (1 - a)),
  7101. (1 - a) / 2 + (1 + a) / np.pi * np.arctan(x / (1 + a)))
  7102. def _ppf(self, x, a):
  7103. i = x < self._cdf(0, a)
  7104. return np.where(i,
  7105. np.tan(np.pi / (1 - a) * (x - (1 - a) / 2)) * (1 - a),
  7106. np.tan(np.pi / (1 + a) * (x - (1 - a) / 2)) * (1 + a))
  7107. def _stats(self, a, moments='mvsk'):
  7108. return np.nan, np.nan, np.nan, np.nan
  7109. def _fitstart(self, data):
  7110. # Use 0 as the initial guess of the skewness shape parameter.
  7111. # For the location and scale, estimate using the median and
  7112. # quartiles.
  7113. if isinstance(data, CensoredData):
  7114. data = data._uncensor()
  7115. p25, p50, p75 = np.percentile(data, [25, 50, 75])
  7116. return 0.0, p50, (p75 - p25)/2
  7117. skewcauchy = skewcauchy_gen(name='skewcauchy')
  7118. class skewnorm_gen(rv_continuous):
  7119. r"""A skew-normal random variable.
  7120. %(before_notes)s
  7121. Notes
  7122. -----
  7123. The pdf is::
  7124. skewnorm.pdf(x, a) = 2 * norm.pdf(x) * norm.cdf(a*x)
  7125. `skewnorm` takes a real number :math:`a` as a skewness parameter
  7126. When ``a = 0`` the distribution is identical to a normal distribution
  7127. (`norm`). `rvs` implements the method of [1]_.
  7128. This distribution uses routines from the Boost Math C++ library for
  7129. the computation of ``cdf``, ``ppf`` and ``isf`` methods. [2]_
  7130. %(after_notes)s
  7131. References
  7132. ----------
  7133. .. [1] A. Azzalini and A. Capitanio (1999). Statistical applications of
  7134. the multivariate skew-normal distribution. J. Roy. Statist. Soc.,
  7135. B 61, 579-602. :arxiv:`0911.2093`
  7136. .. [2] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  7137. %(example)s
  7138. """
  7139. def _argcheck(self, a):
  7140. return np.isfinite(a)
  7141. def _shape_info(self):
  7142. return [_ShapeInfo("a", False, (-np.inf, np.inf), (False, False))]
  7143. def _pdf(self, x, a):
  7144. return xpx.apply_where(
  7145. a == 0, (x, a),
  7146. lambda x, a: _norm_pdf(x),
  7147. lambda x, a: 2.*_norm_pdf(x)*_norm_cdf(a*x))
  7148. def _logpdf(self, x, a):
  7149. return xpx.apply_where(
  7150. a == 0, (x, a),
  7151. lambda x, a: _norm_logpdf(x),
  7152. lambda x, a: np.log(2)+_norm_logpdf(x)+_norm_logcdf(a*x))
  7153. def _cdf(self, x, a):
  7154. a = np.atleast_1d(a)
  7155. cdf = scu._skewnorm_cdf(x, 0.0, 1.0, a)
  7156. # for some reason, a isn't broadcasted if some of x are invalid
  7157. a = np.broadcast_to(a, cdf.shape)
  7158. # Boost is not accurate in left tail when a > 0
  7159. i_small_cdf = (cdf < 1e-6) & (a > 0)
  7160. cdf[i_small_cdf] = super()._cdf(x[i_small_cdf], a[i_small_cdf])
  7161. return np.clip(cdf, 0, 1)
  7162. def _ppf(self, x, a):
  7163. return scu._skewnorm_ppf(x, 0.0, 1.0, a)
  7164. def _sf(self, x, a):
  7165. # Boost's SF is implemented this way. Use whatever customizations
  7166. # we made in the _cdf.
  7167. return self._cdf(-x, -a)
  7168. def _isf(self, x, a):
  7169. return scu._skewnorm_isf(x, 0.0, 1.0, a)
  7170. def _rvs(self, a, size=None, random_state=None):
  7171. u0 = random_state.normal(size=size)
  7172. v = random_state.normal(size=size)
  7173. d = a/np.sqrt(1 + a**2)
  7174. u1 = d*u0 + v*np.sqrt(1 - d**2)
  7175. return np.where(u0 >= 0, u1, -u1)
  7176. def _stats(self, a, moments='mvsk'):
  7177. output = [None, None, None, None]
  7178. const = np.sqrt(2/np.pi) * a/np.sqrt(1 + a**2)
  7179. if 'm' in moments:
  7180. output[0] = const
  7181. if 'v' in moments:
  7182. output[1] = 1 - const**2
  7183. if 's' in moments:
  7184. output[2] = ((4 - np.pi)/2) * (const/np.sqrt(1 - const**2))**3
  7185. if 'k' in moments:
  7186. output[3] = (2*(np.pi - 3)) * (const**4/(1 - const**2)**2)
  7187. return output
  7188. # For odd order, the each noncentral moment of the skew-normal distribution
  7189. # with location 0 and scale 1 can be expressed as a polynomial in delta,
  7190. # where delta = a/sqrt(1 + a**2) and `a` is the skew-normal shape
  7191. # parameter. The dictionary _skewnorm_odd_moments defines those
  7192. # polynomials for orders up to 19. The dict is implemented as a cached
  7193. # property to reduce the impact of the creation of the dict on import time.
  7194. @cached_property
  7195. def _skewnorm_odd_moments(self):
  7196. skewnorm_odd_moments = {
  7197. 1: Polynomial([1]),
  7198. 3: Polynomial([3, -1]),
  7199. 5: Polynomial([15, -10, 3]),
  7200. 7: Polynomial([105, -105, 63, -15]),
  7201. 9: Polynomial([945, -1260, 1134, -540, 105]),
  7202. 11: Polynomial([10395, -17325, 20790, -14850, 5775, -945]),
  7203. 13: Polynomial([135135, -270270, 405405, -386100, 225225, -73710,
  7204. 10395]),
  7205. 15: Polynomial([2027025, -4729725, 8513505, -10135125, 7882875,
  7206. -3869775, 1091475, -135135]),
  7207. 17: Polynomial([34459425, -91891800, 192972780, -275675400,
  7208. 268017750, -175429800, 74220300, -18378360,
  7209. 2027025]),
  7210. 19: Polynomial([654729075, -1964187225, 4714049340, -7856748900,
  7211. 9166207050, -7499623950, 4230557100, -1571349780,
  7212. 346621275, -34459425]),
  7213. }
  7214. return skewnorm_odd_moments
  7215. def _munp(self, order, a):
  7216. if order % 2:
  7217. if order > 19:
  7218. raise NotImplementedError("skewnorm noncentral moments not "
  7219. "implemented for odd orders greater "
  7220. "than 19.")
  7221. # Use the precomputed polynomials that were derived from the
  7222. # moment generating function.
  7223. delta = a/np.sqrt(1 + a**2)
  7224. return (delta * self._skewnorm_odd_moments[order](delta**2)
  7225. * _SQRT_2_OVER_PI)
  7226. else:
  7227. # For even order, the moment is just (order-1)!!, where !! is the
  7228. # notation for the double factorial; for an odd integer m, m!! is
  7229. # m*(m-2)*...*3*1.
  7230. # We could use special.factorial2, but we know the argument is odd,
  7231. # so avoid the overhead of that function and compute the result
  7232. # directly here.
  7233. return sc.gamma((order + 1)/2) * 2**(order/2) / _SQRT_PI
  7234. @extend_notes_in_docstring(rv_continuous, notes="""\
  7235. If ``method='mm'``, parameters fixed by the user are respected, and the
  7236. remaining parameters are used to match distribution and sample moments
  7237. where possible. For example, if the user fixes the location with
  7238. ``floc``, the parameters will only match the distribution skewness and
  7239. variance to the sample skewness and variance; no attempt will be made
  7240. to match the means or minimize a norm of the errors.
  7241. Note that the maximum possible skewness magnitude of a
  7242. `scipy.stats.skewnorm` distribution is approximately 0.9952717; if the
  7243. magnitude of the data's sample skewness exceeds this, the returned
  7244. shape parameter ``a`` will be infinite.
  7245. \n\n""")
  7246. def fit(self, data, *args, **kwds):
  7247. if kwds.pop("superfit", False):
  7248. return super().fit(data, *args, **kwds)
  7249. if isinstance(data, CensoredData):
  7250. if data.num_censored() == 0:
  7251. data = data._uncensor()
  7252. else:
  7253. return super().fit(data, *args, **kwds)
  7254. # this extracts fixed shape, location, and scale however they
  7255. # are specified, and also leaves them in `kwds`
  7256. data, fa, floc, fscale = _check_fit_input_parameters(self, data,
  7257. args, kwds)
  7258. method = kwds.get("method", "mle").lower()
  7259. # See https://en.wikipedia.org/wiki/Skew_normal_distribution for
  7260. # moment formulas.
  7261. def skew_d(d): # skewness in terms of delta
  7262. return (4-np.pi)/2 * ((d * np.sqrt(2 / np.pi))**3
  7263. / (1 - 2*d**2 / np.pi)**(3/2))
  7264. def d_skew(skew): # delta in terms of skewness
  7265. s_23 = np.abs(skew)**(2/3)
  7266. return np.sign(skew) * np.sqrt(
  7267. np.pi/2 * s_23 / (s_23 + ((4 - np.pi)/2)**(2/3))
  7268. )
  7269. # If method is method of moments, we don't need the user's guesses.
  7270. # Otherwise, extract the guesses from args and kwds.
  7271. if method == "mm":
  7272. a, loc, scale = None, None, None
  7273. else:
  7274. a = args[0] if len(args) else None
  7275. loc = kwds.pop('loc', None)
  7276. scale = kwds.pop('scale', None)
  7277. if fa is None and a is None: # not fixed and no guess: use MoM
  7278. # Solve for a that matches sample distribution skewness to sample
  7279. # skewness.
  7280. s = stats.skew(data)
  7281. if method == 'mle':
  7282. # For MLE initial conditions, clip skewness to a large but
  7283. # reasonable value in case the data skewness is out-of-range.
  7284. s = np.clip(s, -0.99, 0.99)
  7285. else:
  7286. s_max = skew_d(1)
  7287. s = np.clip(s, -s_max, s_max)
  7288. d = d_skew(s)
  7289. with np.errstate(divide='ignore'):
  7290. a = np.sqrt(np.divide(d**2, (1-d**2)))*np.sign(s)
  7291. else:
  7292. a = fa if fa is not None else a
  7293. d = a / np.sqrt(1 + a**2)
  7294. if fscale is None and scale is None:
  7295. v = np.var(data)
  7296. scale = np.sqrt(v / (1 - 2*d**2/np.pi))
  7297. elif fscale is not None:
  7298. scale = fscale
  7299. if floc is None and loc is None:
  7300. m = np.mean(data)
  7301. loc = m - scale*d*np.sqrt(2/np.pi)
  7302. elif floc is not None:
  7303. loc = floc
  7304. if method == 'mm':
  7305. return a, loc, scale
  7306. else:
  7307. # At this point, parameter "guesses" may equal the fixed parameters
  7308. # in kwds. No harm in passing them as guesses, too.
  7309. return super().fit(data, a, loc=loc, scale=scale, **kwds)
  7310. skewnorm = skewnorm_gen(name='skewnorm')
  7311. class trapezoid_gen(rv_continuous):
  7312. r"""A trapezoidal continuous random variable.
  7313. %(before_notes)s
  7314. Notes
  7315. -----
  7316. The trapezoidal distribution can be represented with an up-sloping line
  7317. from ``loc`` to ``(loc + c*scale)``, then constant to ``(loc + d*scale)``
  7318. and then downsloping from ``(loc + d*scale)`` to ``(loc+scale)``. This
  7319. defines the trapezoid base from ``loc`` to ``(loc+scale)`` and the flat
  7320. top from ``c`` to ``d`` proportional to the position along the base
  7321. with ``0 <= c <= d <= 1``. When ``c=d``, this is equivalent to `triang`
  7322. with the same values for `loc`, `scale` and `c`.
  7323. The method of [1]_ is used for computing moments.
  7324. `trapezoid` takes :math:`c` and :math:`d` as shape parameters.
  7325. %(after_notes)s
  7326. The standard form is in the range [0, 1] with c the mode.
  7327. The location parameter shifts the start to `loc`.
  7328. The scale parameter changes the width from 1 to `scale`.
  7329. %(example)s
  7330. References
  7331. ----------
  7332. .. [1] Kacker, R.N. and Lawrence, J.F. (2007). Trapezoidal and triangular
  7333. distributions for Type B evaluation of standard uncertainty.
  7334. Metrologia 44, 117-127. :doi:`10.1088/0026-1394/44/2/003`
  7335. """
  7336. def _argcheck(self, c, d):
  7337. return (c >= 0) & (c <= 1) & (d >= 0) & (d <= 1) & (d >= c)
  7338. def _shape_info(self):
  7339. ic = _ShapeInfo("c", False, (0, 1.0), (True, True))
  7340. id = _ShapeInfo("d", False, (0, 1.0), (True, True))
  7341. return [ic, id]
  7342. def _pdf(self, x, c, d):
  7343. u = 2 / (d-c+1)
  7344. return _lazyselect([x < c,
  7345. (c <= x) & (x <= d),
  7346. x > d],
  7347. [lambda x, c, d, u: u * x / c,
  7348. lambda x, c, d, u: u,
  7349. lambda x, c, d, u: u * (1-x) / (1-d)],
  7350. (x, c, d, u))
  7351. def _cdf(self, x, c, d):
  7352. return _lazyselect([x < c,
  7353. (c <= x) & (x <= d),
  7354. x > d],
  7355. [lambda x, c, d: x**2 / c / (d-c+1),
  7356. lambda x, c, d: (c + 2 * (x-c)) / (d-c+1),
  7357. lambda x, c, d: 1-((1-x) ** 2
  7358. / (d-c+1) / (1-d))],
  7359. (x, c, d))
  7360. def _ppf(self, q, c, d):
  7361. qc, qd = self._cdf(c, c, d), self._cdf(d, c, d)
  7362. condlist = [q < qc, q <= qd, q > qd]
  7363. choicelist = [np.sqrt(q * c * (1 + d - c)),
  7364. 0.5 * q * (1 + d - c) + 0.5 * c,
  7365. 1 - np.sqrt((1 - q) * (d - c + 1) * (1 - d))]
  7366. return np.select(condlist, choicelist)
  7367. def _munp(self, n, c, d):
  7368. # Using the parameterization from Kacker, 2007, with
  7369. # a=bottom left, c=top left, d=top right, b=bottom right, then
  7370. # E[X^n] = h/(n+1)/(n+2) [(b^{n+2}-d^{n+2})/(b-d)
  7371. # - ((c^{n+2} - a^{n+2})/(c-a)]
  7372. # with h = 2/((b-a) - (d-c)). The corresponding parameterization
  7373. # in scipy, has a'=loc, c'=loc+c*scale, d'=loc+d*scale, b'=loc+scale,
  7374. # which for standard form reduces to a'=0, b'=1, c'=c, d'=d.
  7375. # Substituting into E[X^n] gives the bd' term as (1 - d^{n+2})/(1 - d)
  7376. # and the ac' term as c^{n-1} for the standard form. The bd' term has
  7377. # numerical difficulties near d=1, so replace (1 - d^{n+2})/(1-d)
  7378. # with expm1((n+2)*log(d))/(d-1).
  7379. # Testing with n=18 for c=(1e-30,1-eps) shows that this is stable.
  7380. # We still require an explicit test for d=1 to prevent divide by zero,
  7381. # and now a test for d=0 to prevent log(0).
  7382. ab_term = c**(n+1)
  7383. dc_term = _lazyselect(
  7384. [d == 0.0, (0.0 < d) & (d < 1.0), d == 1.0],
  7385. [lambda d: 1.0,
  7386. lambda d: np.expm1((n+2) * np.log(d)) / (d-1.0),
  7387. lambda d: n+2],
  7388. [d])
  7389. val = 2.0 / (1.0+d-c) * (dc_term - ab_term) / ((n+1) * (n+2))
  7390. return val
  7391. def _entropy(self, c, d):
  7392. # Using the parameterization from Wikipedia (van Dorp, 2003)
  7393. # with a=bottom left, c=top left, d=top right, b=bottom right
  7394. # gives a'=loc, b'=loc+c*scale, c'=loc+d*scale, d'=loc+scale,
  7395. # which for loc=0, scale=1 is a'=0, b'=c, c'=d, d'=1.
  7396. # Substituting into the entropy formula from Wikipedia gives
  7397. # the following result.
  7398. return 0.5 * (1.0-d+c) / (1.0+d-c) + np.log(0.5 * (1.0+d-c))
  7399. def _fitstart(self, data, args=None):
  7400. # Arbitrary, but c=d=1 fails due to being on edge of bounds
  7401. if args is None:
  7402. args = (0.33, 0.66)
  7403. return super()._fitstart(data, args=args)
  7404. trapezoid = trapezoid_gen(a=0.0, b=1.0, name="trapezoid")
  7405. class triang_gen(rv_continuous):
  7406. r"""A triangular continuous random variable.
  7407. %(before_notes)s
  7408. Notes
  7409. -----
  7410. The triangular distribution can be represented with an up-sloping line from
  7411. ``loc`` to ``(loc + c*scale)`` and then downsloping for ``(loc + c*scale)``
  7412. to ``(loc + scale)``.
  7413. `triang` takes ``c`` as a shape parameter for :math:`0 \le c \le 1`.
  7414. %(after_notes)s
  7415. The standard form is in the range [0, 1] with c the mode.
  7416. The location parameter shifts the start to `loc`.
  7417. The scale parameter changes the width from 1 to `scale`.
  7418. %(example)s
  7419. """
  7420. def _rvs(self, c, size=None, random_state=None):
  7421. return random_state.triangular(0, c, 1, size)
  7422. def _argcheck(self, c):
  7423. return (c >= 0) & (c <= 1)
  7424. def _shape_info(self):
  7425. return [_ShapeInfo("c", False, (0, 1.0), (True, True))]
  7426. def _pdf(self, x, c):
  7427. # 0: edge case where c=0
  7428. # 1: generalised case for x < c, don't use x <= c, as it doesn't cope
  7429. # with c = 0.
  7430. # 2: generalised case for x >= c, but doesn't cope with c = 1
  7431. # 3: edge case where c=1
  7432. r = _lazyselect([c == 0,
  7433. x < c,
  7434. (x >= c) & (c != 1),
  7435. c == 1],
  7436. [lambda x, c: 2 - 2 * x,
  7437. lambda x, c: 2 * x / c,
  7438. lambda x, c: 2 * (1 - x) / (1 - c),
  7439. lambda x, c: 2 * x],
  7440. (x, c))
  7441. return r
  7442. def _cdf(self, x, c):
  7443. r = _lazyselect([c == 0,
  7444. x < c,
  7445. (x >= c) & (c != 1),
  7446. c == 1],
  7447. [lambda x, c: 2*x - x*x,
  7448. lambda x, c: x * x / c,
  7449. lambda x, c: (x*x - 2*x + c) / (c-1),
  7450. lambda x, c: x * x],
  7451. (x, c))
  7452. return r
  7453. def _ppf(self, q, c):
  7454. return np.where(q < c, np.sqrt(c * q), 1-np.sqrt((1-c) * (1-q)))
  7455. def _stats(self, c):
  7456. return ((c+1.0)/3.0,
  7457. (1.0-c+c*c)/18,
  7458. np.sqrt(2)*(2*c-1)*(c+1)*(c-2) / (5*np.power((1.0-c+c*c), 1.5)),
  7459. -3.0/5.0)
  7460. def _entropy(self, c):
  7461. return 0.5-np.log(2)
  7462. triang = triang_gen(a=0.0, b=1.0, name="triang")
  7463. class truncexpon_gen(rv_continuous):
  7464. r"""A truncated exponential continuous random variable.
  7465. %(before_notes)s
  7466. Notes
  7467. -----
  7468. The probability density function for `truncexpon` is:
  7469. .. math::
  7470. f(x, b) = \frac{\exp(-x)}{1 - \exp(-b)}
  7471. for :math:`0 <= x <= b`.
  7472. `truncexpon` takes ``b`` as a shape parameter for :math:`b`.
  7473. %(after_notes)s
  7474. %(example)s
  7475. """
  7476. def _shape_info(self):
  7477. return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
  7478. def _get_support(self, b):
  7479. return self.a, b
  7480. def _pdf(self, x, b):
  7481. # truncexpon.pdf(x, b) = exp(-x) / (1-exp(-b))
  7482. return np.exp(-x)/(-sc.expm1(-b))
  7483. def _logpdf(self, x, b):
  7484. return -x - np.log(-sc.expm1(-b))
  7485. def _cdf(self, x, b):
  7486. return sc.expm1(-x)/sc.expm1(-b)
  7487. def _ppf(self, q, b):
  7488. return -sc.log1p(q*sc.expm1(-b))
  7489. def _sf(self, x, b):
  7490. return (np.exp(-b) - np.exp(-x))/sc.expm1(-b)
  7491. def _isf(self, q, b):
  7492. return -np.log(np.exp(-b) - q * sc.expm1(-b))
  7493. def _munp(self, n, b):
  7494. # wrong answer with formula, same as in continuous.pdf
  7495. # return sc.gamman+1)-sc.gammainc1+n, b)
  7496. if n == 1:
  7497. return (1-(b+1)*np.exp(-b))/(-sc.expm1(-b))
  7498. elif n == 2:
  7499. return 2*(1-0.5*(b*b+2*b+2)*np.exp(-b))/(-sc.expm1(-b))
  7500. else:
  7501. # return generic for higher moments
  7502. return super()._munp(n, b)
  7503. def _entropy(self, b):
  7504. eB = np.exp(b)
  7505. return np.log(eB-1)+(1+eB*(b-1.0))/(1.0-eB)
  7506. truncexpon = truncexpon_gen(a=0.0, name='truncexpon')
  7507. truncexpon._support = (0.0, 'b')
  7508. # logsumexp trick for log(p + q) with only log(p) and log(q)
  7509. def _log_sum(log_p, log_q):
  7510. return sc.logsumexp([log_p, log_q], axis=0)
  7511. # same as above, but using -exp(x) = exp(x + πi)
  7512. def _log_diff(log_p, log_q):
  7513. return sc.logsumexp([log_p, log_q+np.pi*1j], axis=0)
  7514. def _log_gauss_mass(a, b):
  7515. """Log of Gaussian probability mass within an interval"""
  7516. a, b = np.broadcast_arrays(a, b)
  7517. # Calculations in right tail are inaccurate, so we'll exploit the
  7518. # symmetry and work only in the left tail
  7519. case_left = b <= 0
  7520. case_right = a > 0
  7521. case_central = ~(case_left | case_right)
  7522. def mass_case_left(a, b):
  7523. return _log_diff(_norm_logcdf(b), _norm_logcdf(a))
  7524. def mass_case_right(a, b):
  7525. return mass_case_left(-b, -a)
  7526. def mass_case_central(a, b):
  7527. # Previously, this was implemented as:
  7528. # left_mass = mass_case_left(a, 0)
  7529. # right_mass = mass_case_right(0, b)
  7530. # return _log_sum(left_mass, right_mass)
  7531. # Catastrophic cancellation occurs as np.exp(log_mass) approaches 1.
  7532. # Correct for this with an alternative formulation.
  7533. # We're not concerned with underflow here: if only one term
  7534. # underflows, it was insignificant; if both terms underflow,
  7535. # the result can't accurately be represented in logspace anyway
  7536. # because sc.log1p(x) ~ x for small x.
  7537. return sc.log1p(-_norm_cdf(a) - _norm_cdf(-b))
  7538. # _lazyselect not working; don't care to debug it
  7539. out = np.full_like(a, fill_value=np.nan, dtype=np.complex128)
  7540. if a[case_left].size:
  7541. out[case_left] = mass_case_left(a[case_left], b[case_left])
  7542. if a[case_right].size:
  7543. out[case_right] = mass_case_right(a[case_right], b[case_right])
  7544. if a[case_central].size:
  7545. out[case_central] = mass_case_central(a[case_central], b[case_central])
  7546. return np.real(out) # discard ~0j
  7547. class truncnorm_gen(rv_continuous):
  7548. r"""A truncated normal continuous random variable.
  7549. %(before_notes)s
  7550. Notes
  7551. -----
  7552. This distribution is the normal distribution centered on ``loc`` (default
  7553. 0), with standard deviation ``scale`` (default 1), and truncated at ``a``
  7554. and ``b`` *standard deviations* from ``loc``. For arbitrary ``loc`` and
  7555. ``scale``, ``a`` and ``b`` are *not* the abscissae at which the shifted
  7556. and scaled distribution is truncated.
  7557. .. note::
  7558. If ``a_trunc`` and ``b_trunc`` are the abscissae at which we wish
  7559. to truncate the distribution (as opposed to the number of standard
  7560. deviations from ``loc``), then we can calculate the distribution
  7561. parameters ``a`` and ``b`` as follows::
  7562. a, b = (a_trunc - loc) / scale, (b_trunc - loc) / scale
  7563. This is a common point of confusion. For additional clarification,
  7564. please see the example below.
  7565. %(example)s
  7566. In the examples above, ``loc=0`` and ``scale=1``, so the plot is truncated
  7567. at ``a`` on the left and ``b`` on the right. However, suppose we were to
  7568. produce the same histogram with ``loc = 1`` and ``scale=0.5``.
  7569. >>> loc, scale = 1, 0.5
  7570. >>> rv = truncnorm(a, b, loc=loc, scale=scale)
  7571. >>> x = np.linspace(truncnorm.ppf(0.01, a, b),
  7572. ... truncnorm.ppf(0.99, a, b), 100)
  7573. >>> r = rv.rvs(size=1000)
  7574. >>> fig, ax = plt.subplots(1, 1)
  7575. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  7576. >>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
  7577. >>> ax.set_xlim(a, b)
  7578. >>> ax.legend(loc='best', frameon=False)
  7579. >>> plt.show()
  7580. Note that the distribution is no longer appears to be truncated at
  7581. abscissae ``a`` and ``b``. That is because the *standard* normal
  7582. distribution is first truncated at ``a`` and ``b``, *then* the resulting
  7583. distribution is scaled by ``scale`` and shifted by ``loc``. If we instead
  7584. want the shifted and scaled distribution to be truncated at ``a`` and
  7585. ``b``, we need to transform these values before passing them as the
  7586. distribution parameters.
  7587. >>> a_transformed, b_transformed = (a - loc) / scale, (b - loc) / scale
  7588. >>> rv = truncnorm(a_transformed, b_transformed, loc=loc, scale=scale)
  7589. >>> x = np.linspace(truncnorm.ppf(0.01, a, b),
  7590. ... truncnorm.ppf(0.99, a, b), 100)
  7591. >>> r = rv.rvs(size=10000)
  7592. >>> fig, ax = plt.subplots(1, 1)
  7593. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  7594. >>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
  7595. >>> ax.set_xlim(a-0.1, b+0.1)
  7596. >>> ax.legend(loc='best', frameon=False)
  7597. >>> plt.show()
  7598. """
  7599. def _argcheck(self, a, b):
  7600. return a < b
  7601. def _shape_info(self):
  7602. ia = _ShapeInfo("a", False, (-np.inf, np.inf), (True, False))
  7603. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, True))
  7604. return [ia, ib]
  7605. def _fitstart(self, data):
  7606. # Reasonable, since support is [a, b]
  7607. if isinstance(data, CensoredData):
  7608. data = data._uncensor()
  7609. return super()._fitstart(data, args=(np.min(data), np.max(data)))
  7610. def _get_support(self, a, b):
  7611. return a, b
  7612. def _pdf(self, x, a, b):
  7613. return np.exp(self._logpdf(x, a, b))
  7614. def _logpdf(self, x, a, b):
  7615. return _norm_logpdf(x) - _log_gauss_mass(a, b)
  7616. def _cdf(self, x, a, b):
  7617. return np.exp(self._logcdf(x, a, b))
  7618. def _logcdf(self, x, a, b):
  7619. x, a, b = np.broadcast_arrays(x, a, b)
  7620. logcdf = np.asarray(_log_gauss_mass(a, x) - _log_gauss_mass(a, b))
  7621. i = logcdf > -0.1 # avoid catastrophic cancellation
  7622. if np.any(i):
  7623. logcdf[i] = np.log1p(-np.exp(self._logsf(x[i], a[i], b[i])))
  7624. return logcdf
  7625. def _sf(self, x, a, b):
  7626. return np.exp(self._logsf(x, a, b))
  7627. def _logsf(self, x, a, b):
  7628. x, a, b = np.broadcast_arrays(x, a, b)
  7629. logsf = np.asarray(_log_gauss_mass(x, b) - _log_gauss_mass(a, b))
  7630. i = logsf > -0.1 # avoid catastrophic cancellation
  7631. if np.any(i):
  7632. logsf[i] = np.log1p(-np.exp(self._logcdf(x[i], a[i], b[i])))
  7633. return logsf
  7634. def _entropy(self, a, b):
  7635. A = _norm_cdf(a)
  7636. B = _norm_cdf(b)
  7637. Z = B - A
  7638. C = np.log(np.sqrt(2 * np.pi * np.e) * Z)
  7639. D = (a * _norm_pdf(a) - b * _norm_pdf(b)) / (2 * Z)
  7640. h = C + D
  7641. return h
  7642. def _ppf(self, q, a, b):
  7643. q, a, b = np.broadcast_arrays(q, a, b)
  7644. case_left = a < 0
  7645. case_right = ~case_left
  7646. def ppf_left(q, a, b):
  7647. log_Phi_x = _log_sum(_norm_logcdf(a),
  7648. np.log(q) + _log_gauss_mass(a, b))
  7649. return sc.ndtri_exp(log_Phi_x)
  7650. def ppf_right(q, a, b):
  7651. log_Phi_x = _log_sum(_norm_logcdf(-b),
  7652. np.log1p(-q) + _log_gauss_mass(a, b))
  7653. return -sc.ndtri_exp(log_Phi_x)
  7654. out = np.empty_like(q)
  7655. q_left = q[case_left]
  7656. q_right = q[case_right]
  7657. if q_left.size:
  7658. out[case_left] = ppf_left(q_left, a[case_left], b[case_left])
  7659. if q_right.size:
  7660. out[case_right] = ppf_right(q_right, a[case_right], b[case_right])
  7661. return out
  7662. def _isf(self, q, a, b):
  7663. # Mostly copy-paste of _ppf, but I think this is simpler than combining
  7664. q, a, b = np.broadcast_arrays(q, a, b)
  7665. case_left = b < 0
  7666. case_right = ~case_left
  7667. def isf_left(q, a, b):
  7668. log_Phi_x = _log_diff(_norm_logcdf(b),
  7669. np.log(q) + _log_gauss_mass(a, b))
  7670. return sc.ndtri_exp(np.real(log_Phi_x))
  7671. def isf_right(q, a, b):
  7672. log_Phi_x = _log_diff(_norm_logcdf(-a),
  7673. np.log1p(-q) + _log_gauss_mass(a, b))
  7674. return -sc.ndtri_exp(np.real(log_Phi_x))
  7675. out = np.empty_like(q)
  7676. q_left = q[case_left]
  7677. q_right = q[case_right]
  7678. if q_left.size:
  7679. out[case_left] = isf_left(q_left, a[case_left], b[case_left])
  7680. if q_right.size:
  7681. out[case_right] = isf_right(q_right, a[case_right], b[case_right])
  7682. return out
  7683. def _munp(self, n, a, b):
  7684. def n_th_moment(n, a, b):
  7685. """
  7686. Returns n-th moment. Defined only if n >= 0.
  7687. Function cannot broadcast due to the loop over n
  7688. """
  7689. ab = np.asarray([a, b])
  7690. pA, pB = self._pdf(ab, a, b)
  7691. probs = np.asarray([pA, -pB])
  7692. cond = probs != 0
  7693. moments = [0, 1]
  7694. for k in range(1, n+1):
  7695. # a or b might be infinite, and the corresponding pdf value
  7696. # is 0 in that case, but nan is returned for the
  7697. # multiplication. However, as b->infinity, pdf(b)*b**k -> 0.
  7698. # So it is safe to use xpx.apply_where to avoid the nan.
  7699. vals = xpx.apply_where(cond, (probs, ab),
  7700. lambda x, y: x * y**(k-1),
  7701. fill_value=0)
  7702. mk = np.sum(vals) + (k-1) * moments[-2]
  7703. moments.append(mk)
  7704. return moments[-1]
  7705. return xpx.apply_where((n >= 0) & (a == a) & (b == b), (n, a, b),
  7706. np.vectorize(n_th_moment, otypes=[np.float64]),
  7707. fill_value=np.nan)
  7708. def _stats(self, a, b, moments='mv'):
  7709. pA, pB = self.pdf(np.array([a, b]), a, b)
  7710. def _truncnorm_stats_scalar(a, b, pA, pB):
  7711. ab = np.asarray([a, b])
  7712. m1 = pA - pB
  7713. mu = m1
  7714. # use xpx.apply_where to avoid nan (See detailed comment in _munp)
  7715. probs = np.asarray([pA, -pB])
  7716. cond = probs != 0
  7717. vals = xpx.apply_where(cond, (probs, ab), lambda x, y: x*y,
  7718. fill_value=0)
  7719. m2 = 1 + np.sum(vals)
  7720. vals = xpx.apply_where(cond, (probs, ab - mu), lambda x, y: x*y,
  7721. fill_value=0)
  7722. # mu2 = m2 - mu**2, but not as numerically stable as:
  7723. # mu2 = (a-mu)*pA - (b-mu)*pB + 1
  7724. mu2 = 1 + np.sum(vals)
  7725. vals = xpx.apply_where(cond, (probs, ab), lambda x, y: x*y**2,
  7726. fill_value=0)
  7727. m3 = 2*m1 + np.sum(vals)
  7728. vals = xpx.apply_where(cond, (probs, ab), lambda x, y: x*y**3,
  7729. fill_value=0)
  7730. m4 = 3*m2 + np.sum(vals)
  7731. mu3 = m3 + m1 * (-3*m2 + 2*m1**2)
  7732. g1 = mu3 / np.power(mu2, 1.5)
  7733. mu4 = m4 + m1*(-4*m3 + 3*m1*(2*m2 - m1**2))
  7734. g2 = mu4 / mu2**2 - 3
  7735. return mu, mu2, g1, g2
  7736. _truncnorm_stats = np.vectorize(_truncnorm_stats_scalar)
  7737. return _truncnorm_stats(a, b, pA, pB)
  7738. truncnorm = truncnorm_gen(name='truncnorm', momtype=1)
  7739. truncnorm._support = ('a', 'b')
  7740. class truncpareto_gen(rv_continuous):
  7741. r"""An upper truncated Pareto continuous random variable.
  7742. %(before_notes)s
  7743. See Also
  7744. --------
  7745. pareto : Pareto distribution
  7746. Notes
  7747. -----
  7748. The probability density function for `truncpareto` is:
  7749. .. math::
  7750. f(x, b, c) = \frac{b}{1 - c^{-b}} \frac{1}{x^{b+1}}
  7751. for :math:`b \neq 0`, :math:`c > 1` and :math:`1 \le x \le c`.
  7752. `truncpareto` takes `b` and `c` as shape parameters for :math:`b` and
  7753. :math:`c`.
  7754. Notice that the upper truncation value :math:`c` is defined in
  7755. standardized form so that random values of an unscaled, unshifted variable
  7756. are within the range ``[1, c]``.
  7757. If ``u_r`` is the upper bound to a scaled and/or shifted variable,
  7758. then ``c = (u_r - loc) / scale``. In other words, the support of the
  7759. distribution becomes ``(scale + loc) <= x <= (c*scale + loc)`` when
  7760. `scale` and/or `loc` are provided.
  7761. The ``fit`` method assumes that :math:`b` is positive; it does not produce
  7762. good results when the data is more consistent with negative :math:`b`.
  7763. `truncpareto` can also be used to model a general power law distribution
  7764. with PDF:
  7765. .. math::
  7766. f(x; a, l, h) = \frac{a}{h^a - l^a} x^{a-1}
  7767. for :math:`a \neq 0` and :math:`0 < l < x < h`. Suppose :math:`a`,
  7768. :math:`l`, and :math:`h` are represented in code as ``a``, ``l``, and
  7769. ``h``, respectively. In this case, use `truncpareto` with parameters
  7770. ``b = -a``, ``c = h / l``, ``scale = l``, and ``loc = 0``.
  7771. %(after_notes)s
  7772. References
  7773. ----------
  7774. .. [1] Burroughs, S. M., and Tebbens S. F.
  7775. "Upper-truncated power laws in natural systems."
  7776. Pure and Applied Geophysics 158.4 (2001): 741-757.
  7777. %(example)s
  7778. """
  7779. def _shape_info(self):
  7780. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
  7781. ic = _ShapeInfo("c", False, (1.0, np.inf), (False, False))
  7782. return [ib, ic]
  7783. def _argcheck(self, b, c):
  7784. return (b != 0.) & (c > 1.)
  7785. def _get_support(self, b, c):
  7786. return self.a, c
  7787. def _pdf(self, x, b, c):
  7788. # here and below, avoid int to negative int power
  7789. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7790. return b * x**-(b+1) / (1 - 1/c**b)
  7791. def _logpdf(self, x, b, c):
  7792. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7793. return xpx.apply_where(b > 0, (x, b, c), self._logpdf_pos_b, super()._logpdf)
  7794. def _logpdf_pos_b(self, x, b, c):
  7795. return np.log(b) - np.log(-np.expm1(-b*np.log(c))) - (b+1)*np.log(x)
  7796. def _cdf(self, x, b, c):
  7797. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7798. return (1 - x**-b) / (1 - 1/c**b)
  7799. def _logcdf(self, x, b, c):
  7800. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7801. return xpx.apply_where(b > 0, (x, b, c), self._logcdf_pos_b, super()._logcdf)
  7802. def _logcdf_pos_b(self, x, b, c):
  7803. return np.log1p(-x**-b) - np.log1p(-1/c**b)
  7804. def _ppf(self, q, b, c):
  7805. q, b, c = xp_promote(q, b, c, force_floating=True, xp=np)
  7806. return pow(1 - (1 - 1/c**b)*q, -1/b)
  7807. def _sf(self, x, b, c):
  7808. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7809. return (x**-b - 1/c**b) / (1 - 1/c**b)
  7810. def _logsf(self, x, b, c):
  7811. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7812. return xpx.apply_where(b > 0, (x, b, c), self._logsf_pos_b, super()._logsf)
  7813. def _logsf_pos_b(self, x, b, c):
  7814. return np.log(x**-b - 1/c**b) - np.log1p(-1/c**b)
  7815. def _isf(self, q, b, c):
  7816. q, b, c = xp_promote(q, b, c, force_floating=True, xp=np)
  7817. return pow(1/c**b + (1 - 1/c**b)*q, -1/b)
  7818. def _entropy(self, b, c):
  7819. return -(np.log(b/(1 - 1/c**b))
  7820. + (b+1)*(np.log(c)/(c**b - 1) - 1/b))
  7821. def _munp(self, n, b, c):
  7822. n, b, c = xp_promote(n, b, c, force_floating=True, xp=np)
  7823. if (n == b).all():
  7824. return b*np.log(c) / (1 - 1/c**b)
  7825. else:
  7826. return b / (b-n) * (c**b - c**n) / (c**b - 1)
  7827. def _fitstart(self, data):
  7828. if isinstance(data, CensoredData):
  7829. data = data._uncensor()
  7830. b, loc, scale = pareto.fit(data)
  7831. c = (max(data) - loc)/scale
  7832. return b, c, loc, scale
  7833. @_call_super_mom
  7834. @inherit_docstring_from(rv_continuous)
  7835. def fit(self, data, *args, **kwds):
  7836. if kwds.pop("superfit", False):
  7837. return super().fit(data, *args, **kwds)
  7838. def log_mean(x):
  7839. return np.mean(np.log(x))
  7840. def harm_mean(x):
  7841. return 1/np.mean(1/x)
  7842. def get_b(c, loc, scale):
  7843. u = (data-loc)/scale
  7844. harm_m = harm_mean(u)
  7845. log_m = log_mean(u)
  7846. quot = (harm_m-1)/log_m
  7847. return (1 - (quot-1) / (quot - (1 - 1/c)*harm_m/np.log(c)))/log_m
  7848. def get_c(loc, scale):
  7849. return (mx - loc)/scale
  7850. def get_loc(fc, fscale):
  7851. if fscale: # (fscale and fc) or (fscale and not fc)
  7852. loc = mn - fscale
  7853. return loc
  7854. if fc:
  7855. loc = (fc*mn - mx)/(fc - 1)
  7856. return loc
  7857. def get_scale(loc):
  7858. return mn - loc
  7859. # Functions used for optimisation; partial derivatives of
  7860. # the Lagrangian, set to equal 0.
  7861. def dL_dLoc(loc, b_=None):
  7862. # Partial derivative wrt location.
  7863. # Optimised upon when no parameters, or only b, are fixed.
  7864. scale = get_scale(loc)
  7865. c = get_c(loc, scale)
  7866. b = get_b(c, loc, scale) if b_ is None else b_
  7867. harm_m = harm_mean((data - loc)/scale)
  7868. return 1 - (1 + (c - 1)/(c**(b+1) - c)) * (1 - 1/(b+1)) * harm_m
  7869. def dL_dB(b, logc, logm):
  7870. # Partial derivative wrt b.
  7871. # Optimised upon whenever at least one parameter but b is fixed,
  7872. # and b is free.
  7873. return b - np.log1p(b*logc / (1 - b*logm)) / logc
  7874. def fallback(data, *args, **kwargs):
  7875. # Should any issue arise, default to the general fit method.
  7876. return super(truncpareto_gen, self).fit(data, *args, **kwargs)
  7877. parameters = _check_fit_input_parameters(self, data, args, kwds)
  7878. data, fb, fc, floc, fscale = parameters
  7879. mn, mx = data.min(), data.max()
  7880. mn_inf = np.nextafter(mn, -np.inf)
  7881. if (fb is not None
  7882. and fc is not None
  7883. and floc is not None
  7884. and fscale is not None):
  7885. raise ValueError("All parameters fixed."
  7886. "There is nothing to optimize.")
  7887. elif fc is None and floc is None and fscale is None:
  7888. if fb is None:
  7889. def cond_b(loc):
  7890. # b is positive only if this function is positive
  7891. scale = get_scale(loc)
  7892. c = get_c(loc, scale)
  7893. harm_m = harm_mean((data - loc)/scale)
  7894. return (1 + 1/(c-1)) * np.log(c) / harm_m - 1
  7895. # This gives an upper bound on loc allowing for a positive b.
  7896. # Iteratively look for a bracket for root_scalar.
  7897. mn_inf = np.nextafter(mn, -np.inf)
  7898. rbrack = mn_inf
  7899. i = 0
  7900. lbrack = rbrack - 1
  7901. while ((lbrack > -np.inf)
  7902. and (cond_b(lbrack)*cond_b(rbrack) >= 0)):
  7903. i += 1
  7904. lbrack = rbrack - np.power(2., i)
  7905. if not lbrack > -np.inf:
  7906. return fallback(data, *args, **kwds)
  7907. res = root_scalar(cond_b, bracket=(lbrack, rbrack))
  7908. if not res.converged:
  7909. return fallback(data, *args, **kwds)
  7910. # Determine the MLE for loc.
  7911. # Iteratively look for a bracket for root_scalar.
  7912. rbrack = res.root - 1e-3 # grad_loc is numerically ill-behaved
  7913. lbrack = rbrack - 1
  7914. i = 0
  7915. while ((lbrack > -np.inf)
  7916. and (dL_dLoc(lbrack)*dL_dLoc(rbrack) >= 0)):
  7917. i += 1
  7918. lbrack = rbrack - np.power(2., i)
  7919. if not lbrack > -np.inf:
  7920. return fallback(data, *args, **kwds)
  7921. res = root_scalar(dL_dLoc, bracket=(lbrack, rbrack))
  7922. if not res.converged:
  7923. return fallback(data, *args, **kwds)
  7924. loc = res.root
  7925. scale = get_scale(loc)
  7926. c = get_c(loc, scale)
  7927. b = get_b(c, loc, scale)
  7928. std_data = (data - loc)/scale
  7929. # The expression of b relies on b being bounded above.
  7930. up_bound_b = min(1/log_mean(std_data),
  7931. 1/(harm_mean(std_data)-1))
  7932. if not (b < up_bound_b):
  7933. return fallback(data, *args, **kwds)
  7934. else:
  7935. # We know b is positive (or a FitError will be triggered)
  7936. # so we let loc get close to min(data).
  7937. rbrack = mn_inf
  7938. lbrack = mn_inf - 1
  7939. i = 0
  7940. # Iteratively look for a bracket for root_scalar.
  7941. while (lbrack > -np.inf
  7942. and (dL_dLoc(lbrack, fb)
  7943. * dL_dLoc(rbrack, fb) >= 0)):
  7944. i += 1
  7945. lbrack = rbrack - 2**i
  7946. if not lbrack > -np.inf:
  7947. return fallback(data, *args, **kwds)
  7948. res = root_scalar(dL_dLoc, (fb,),
  7949. bracket=(lbrack, rbrack))
  7950. if not res.converged:
  7951. return fallback(data, *args, **kwds)
  7952. loc = res.root
  7953. scale = get_scale(loc)
  7954. c = get_c(loc, scale)
  7955. b = fb
  7956. else:
  7957. # At least one of the parameters determining the support is fixed;
  7958. # the others then have analytical expressions from the constraints.
  7959. # The completely determined case (fixed c, loc and scale)
  7960. # has to be checked for not overflowing the support.
  7961. # If not fixed, b has to be determined numerically.
  7962. loc = floc if floc is not None else get_loc(fc, fscale)
  7963. scale = fscale or get_scale(loc)
  7964. c = fc or get_c(loc, scale)
  7965. # Unscaled, translated values should be positive when the location
  7966. # is fixed. If it is not the case, we end up with negative `scale`
  7967. # and `c`, which would trigger a FitError before exiting the
  7968. # method.
  7969. if floc is not None and data.min() - floc < 0:
  7970. raise FitDataError("truncpareto", lower=1, upper=c)
  7971. # Standardised values should be within the distribution support
  7972. # when all parameters controlling it are fixed. If it not the case,
  7973. # `fc` is overridden by `c` determined from `floc` and `fscale` when
  7974. # raising the exception.
  7975. if fc and (floc is not None) and fscale:
  7976. if data.max() > fc*fscale + floc:
  7977. raise FitDataError("truncpareto", lower=1,
  7978. upper=get_c(loc, scale))
  7979. # The other constraints should be automatically satisfied
  7980. # from the analytical expressions of the parameters.
  7981. # If fc or fscale are respectively less than one or less than 0,
  7982. # a FitError is triggered before exiting the method.
  7983. if fb is None:
  7984. std_data = (data - loc)/scale
  7985. logm = log_mean(std_data)
  7986. logc = np.log(c)
  7987. # Condition for a positive root to exist.
  7988. if not (2*logm < logc):
  7989. return fallback(data, *args, **kwds)
  7990. lbrack = 1/logm + 1/(logm - logc)
  7991. rbrack = np.nextafter(1/logm, 0)
  7992. try:
  7993. res = root_scalar(dL_dB, (logc, logm),
  7994. bracket=(lbrack, rbrack))
  7995. # we should then never get there
  7996. if not res.converged:
  7997. return fallback(data, *args, **kwds)
  7998. b = res.root
  7999. except ValueError:
  8000. b = rbrack
  8001. else:
  8002. b = fb
  8003. # The distribution requires that `scale+loc <= data <= c*scale+loc`.
  8004. # To avoid numerical issues, some tuning may be necessary.
  8005. # We adjust `scale` to satisfy the lower bound, and we adjust
  8006. # `c` to satisfy the upper bound.
  8007. if not (scale+loc) < mn:
  8008. if fscale:
  8009. loc = np.nextafter(loc, -np.inf)
  8010. else:
  8011. scale = get_scale(loc)
  8012. scale = np.nextafter(scale, 0)
  8013. if not (c*scale+loc) > mx:
  8014. c = get_c(loc, scale)
  8015. c = np.nextafter(c, np.inf)
  8016. if not (np.all(self._argcheck(b, c)) and (scale > 0)):
  8017. return fallback(data, *args, **kwds)
  8018. params_override = b, c, loc, scale
  8019. if floc is None and fscale is None:
  8020. # Based on testing in gh-16782, the following methods are only
  8021. # reliable if either `floc` or `fscale` are provided. They are
  8022. # fast, though, so might as well see if they are better than the
  8023. # generic method.
  8024. params_super = fallback(data, *args, **kwds)
  8025. nllf_override = self.nnlf(params_override, data)
  8026. nllf_super = self.nnlf(params_super, data)
  8027. if nllf_super < nllf_override:
  8028. return params_super
  8029. return params_override
  8030. truncpareto = truncpareto_gen(a=1.0, name='truncpareto')
  8031. truncpareto._support = (1.0, 'c')
  8032. class tukeylambda_gen(rv_continuous):
  8033. r"""A Tukey-Lamdba continuous random variable.
  8034. %(before_notes)s
  8035. Notes
  8036. -----
  8037. A flexible distribution, able to represent and interpolate between the
  8038. following distributions:
  8039. - Cauchy (:math:`lambda = -1`)
  8040. - logistic (:math:`lambda = 0`)
  8041. - approx Normal (:math:`lambda = 0.14`)
  8042. - uniform from -1 to 1 (:math:`lambda = 1`)
  8043. `tukeylambda` takes a real number :math:`lambda` (denoted ``lam``
  8044. in the implementation) as a shape parameter.
  8045. %(after_notes)s
  8046. %(example)s
  8047. """
  8048. _support_mask = rv_continuous._open_support_mask
  8049. def _argcheck(self, lam):
  8050. return np.isfinite(lam)
  8051. def _shape_info(self):
  8052. return [_ShapeInfo("lam", False, (-np.inf, np.inf), (False, False))]
  8053. def _get_support(self, lam):
  8054. b = xpx.apply_where(lam > 0, lam,
  8055. lambda lam: 1/lam,
  8056. fill_value=np.inf)
  8057. return -b, b
  8058. def _pdf(self, x, lam):
  8059. Fx = np.asarray(sc.tklmbda(x, lam))
  8060. Px = Fx**(lam-1.0) + (np.asarray(1-Fx))**(lam-1.0)
  8061. with np.errstate(divide='ignore'):
  8062. Px = 1.0/np.asarray(Px)
  8063. return np.where((lam <= 0) | (abs(x) < 1.0/np.asarray(lam)), Px, 0.0)
  8064. def _cdf(self, x, lam):
  8065. return sc.tklmbda(x, lam)
  8066. def _ppf(self, q, lam):
  8067. return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam)
  8068. def _stats(self, lam):
  8069. return 0, _tlvar(lam), 0, _tlkurt(lam)
  8070. def _entropy(self, lam):
  8071. def integ(p):
  8072. return np.log(pow(p, lam-1)+pow(1-p, lam-1))
  8073. return integrate.quad(integ, 0, 1)[0]
  8074. tukeylambda = tukeylambda_gen(name='tukeylambda')
  8075. class FitUniformFixedScaleDataError(FitDataError):
  8076. def __init__(self, ptp, fscale):
  8077. self.args = (
  8078. "Invalid values in `data`. Maximum likelihood estimation with "
  8079. "the uniform distribution and fixed scale requires that "
  8080. f"np.ptp(data) <= fscale, but np.ptp(data) = {ptp} and "
  8081. f"fscale = {fscale}."
  8082. )
  8083. class uniform_gen(rv_continuous):
  8084. r"""A uniform continuous random variable.
  8085. In the standard form, the distribution is uniform on ``[0, 1]``. Using
  8086. the parameters ``loc`` and ``scale``, one obtains the uniform distribution
  8087. on ``[loc, loc + scale]``.
  8088. %(before_notes)s
  8089. %(example)s
  8090. """
  8091. def _shape_info(self):
  8092. return []
  8093. def _rvs(self, size=None, random_state=None):
  8094. return random_state.uniform(0.0, 1.0, size)
  8095. def _pdf(self, x):
  8096. return 1.0*(x == x)
  8097. def _cdf(self, x):
  8098. return x
  8099. def _ppf(self, q):
  8100. return q
  8101. def _stats(self):
  8102. return 0.5, 1.0/12, 0, -1.2
  8103. def _entropy(self):
  8104. return 0.0
  8105. @_call_super_mom
  8106. def fit(self, data, *args, **kwds):
  8107. """
  8108. Maximum likelihood estimate for the location and scale parameters.
  8109. `uniform.fit` uses only the following parameters. Because exact
  8110. formulas are used, the parameters related to optimization that are
  8111. available in the `fit` method of other distributions are ignored
  8112. here. The only positional argument accepted is `data`.
  8113. Parameters
  8114. ----------
  8115. data : array_like
  8116. Data to use in calculating the maximum likelihood estimate.
  8117. floc : float, optional
  8118. Hold the location parameter fixed to the specified value.
  8119. fscale : float, optional
  8120. Hold the scale parameter fixed to the specified value.
  8121. Returns
  8122. -------
  8123. loc, scale : float
  8124. Maximum likelihood estimates for the location and scale.
  8125. Notes
  8126. -----
  8127. An error is raised if `floc` is given and any values in `data` are
  8128. less than `floc`, or if `fscale` is given and `fscale` is less
  8129. than ``data.max() - data.min()``. An error is also raised if both
  8130. `floc` and `fscale` are given.
  8131. Examples
  8132. --------
  8133. >>> import numpy as np
  8134. >>> from scipy.stats import uniform
  8135. We'll fit the uniform distribution to `x`:
  8136. >>> x = np.array([2, 2.5, 3.1, 9.5, 13.0])
  8137. For a uniform distribution MLE, the location is the minimum of the
  8138. data, and the scale is the maximum minus the minimum.
  8139. >>> loc, scale = uniform.fit(x)
  8140. >>> loc
  8141. 2.0
  8142. >>> scale
  8143. 11.0
  8144. If we know the data comes from a uniform distribution where the support
  8145. starts at 0, we can use ``floc=0``:
  8146. >>> loc, scale = uniform.fit(x, floc=0)
  8147. >>> loc
  8148. 0.0
  8149. >>> scale
  8150. 13.0
  8151. Alternatively, if we know the length of the support is 12, we can use
  8152. ``fscale=12``:
  8153. >>> loc, scale = uniform.fit(x, fscale=12)
  8154. >>> loc
  8155. 1.5
  8156. >>> scale
  8157. 12.0
  8158. In that last example, the support interval is [1.5, 13.5]. This
  8159. solution is not unique. For example, the distribution with ``loc=2``
  8160. and ``scale=12`` has the same likelihood as the one above. When
  8161. `fscale` is given and it is larger than ``data.max() - data.min()``,
  8162. the parameters returned by the `fit` method center the support over
  8163. the interval ``[data.min(), data.max()]``.
  8164. """
  8165. if len(args) > 0:
  8166. raise TypeError("Too many arguments.")
  8167. floc = kwds.pop('floc', None)
  8168. fscale = kwds.pop('fscale', None)
  8169. _remove_optimizer_parameters(kwds)
  8170. if floc is not None and fscale is not None:
  8171. # This check is for consistency with `rv_continuous.fit`.
  8172. raise ValueError("All parameters fixed. There is nothing to "
  8173. "optimize.")
  8174. data = np.asarray(data)
  8175. if not np.isfinite(data).all():
  8176. raise ValueError("The data contains non-finite values.")
  8177. # MLE for the uniform distribution
  8178. # --------------------------------
  8179. # The PDF is
  8180. #
  8181. # f(x, loc, scale) = {1/scale for loc <= x <= loc + scale
  8182. # {0 otherwise}
  8183. #
  8184. # The likelihood function is
  8185. # L(x, loc, scale) = (1/scale)**n
  8186. # where n is len(x), assuming loc <= x <= loc + scale for all x.
  8187. # The log-likelihood is
  8188. # l(x, loc, scale) = -n*log(scale)
  8189. # The log-likelihood is maximized by making scale as small as possible,
  8190. # while keeping loc <= x <= loc + scale. So if neither loc nor scale
  8191. # are fixed, the log-likelihood is maximized by choosing
  8192. # loc = x.min()
  8193. # scale = np.ptp(x)
  8194. # If loc is fixed, it must be less than or equal to x.min(), and then
  8195. # the scale is
  8196. # scale = x.max() - loc
  8197. # If scale is fixed, it must not be less than np.ptp(x). If scale is
  8198. # greater than np.ptp(x), the solution is not unique. Note that the
  8199. # likelihood does not depend on loc, except for the requirement that
  8200. # loc <= x <= loc + scale. All choices of loc for which
  8201. # x.max() - scale <= loc <= x.min()
  8202. # have the same log-likelihood. In this case, we choose loc such that
  8203. # the support is centered over the interval [data.min(), data.max()]:
  8204. # loc = x.min() = 0.5*(scale - np.ptp(x))
  8205. if fscale is None:
  8206. # scale is not fixed.
  8207. if floc is None:
  8208. # loc is not fixed, scale is not fixed.
  8209. loc = data.min()
  8210. scale = np.ptp(data)
  8211. else:
  8212. # loc is fixed, scale is not fixed.
  8213. loc = floc
  8214. scale = data.max() - loc
  8215. if data.min() < loc:
  8216. raise FitDataError("uniform", lower=loc, upper=loc + scale)
  8217. else:
  8218. # loc is not fixed, scale is fixed.
  8219. ptp = np.ptp(data)
  8220. if ptp > fscale:
  8221. raise FitUniformFixedScaleDataError(ptp=ptp, fscale=fscale)
  8222. # If ptp < fscale, the ML estimate is not unique; see the comments
  8223. # above. We choose the distribution for which the support is
  8224. # centered over the interval [data.min(), data.max()].
  8225. loc = data.min() - 0.5*(fscale - ptp)
  8226. scale = fscale
  8227. # We expect the return values to be floating point, so ensure it
  8228. # by explicitly converting to float.
  8229. return float(loc), float(scale)
  8230. uniform = uniform_gen(a=0.0, b=1.0, name='uniform')
  8231. class vonmises_gen(rv_continuous):
  8232. r"""A Von Mises continuous random variable.
  8233. %(before_notes)s
  8234. See Also
  8235. --------
  8236. scipy.stats.vonmises_fisher : Von-Mises Fisher distribution on a
  8237. hypersphere
  8238. Notes
  8239. -----
  8240. The probability density function for `vonmises` and `vonmises_line` is:
  8241. .. math::
  8242. f(x, \kappa) = \frac{ \exp(\kappa \cos(x)) }{ 2 \pi I_0(\kappa) }
  8243. for :math:`-\pi \le x \le \pi`, :math:`\kappa \ge 0`. :math:`I_0` is the
  8244. modified Bessel function of order zero (`scipy.special.i0`).
  8245. `vonmises` is a circular distribution which does not restrict the
  8246. distribution to a fixed interval. Currently, there is no circular
  8247. distribution framework in SciPy. The ``cdf`` is implemented such that
  8248. ``cdf(x + 2*np.pi) == cdf(x) + 1``.
  8249. `vonmises_line` is the same distribution, defined on :math:`[-\pi, \pi]`
  8250. on the real line. This is a regular (i.e. non-circular) distribution.
  8251. Note about distribution parameters: `vonmises` and `vonmises_line` take
  8252. ``kappa`` as a shape parameter (concentration) and ``loc`` as the location
  8253. (circular mean). A ``scale`` parameter is accepted but does not have any
  8254. effect.
  8255. Examples
  8256. --------
  8257. Import the necessary modules.
  8258. >>> import numpy as np
  8259. >>> import matplotlib.pyplot as plt
  8260. >>> from scipy.stats import vonmises
  8261. Define distribution parameters.
  8262. >>> loc = 0.5 * np.pi # circular mean
  8263. >>> kappa = 1 # concentration
  8264. Compute the probability density at ``x=0`` via the ``pdf`` method.
  8265. >>> vonmises.pdf(0, loc=loc, kappa=kappa)
  8266. 0.12570826359722018
  8267. Verify that the percentile function ``ppf`` inverts the cumulative
  8268. distribution function ``cdf`` up to floating point accuracy.
  8269. >>> x = 1
  8270. >>> cdf_value = vonmises.cdf(x, loc=loc, kappa=kappa)
  8271. >>> ppf_value = vonmises.ppf(cdf_value, loc=loc, kappa=kappa)
  8272. >>> x, cdf_value, ppf_value
  8273. (1, 0.31489339900904967, 1.0000000000000004)
  8274. Draw 1000 random variates by calling the ``rvs`` method.
  8275. >>> sample_size = 1000
  8276. >>> sample = vonmises(loc=loc, kappa=kappa).rvs(sample_size)
  8277. Plot the von Mises density on a Cartesian and polar grid to emphasize
  8278. that it is a circular distribution.
  8279. >>> fig = plt.figure(figsize=(12, 6))
  8280. >>> left = plt.subplot(121)
  8281. >>> right = plt.subplot(122, projection='polar')
  8282. >>> x = np.linspace(-np.pi, np.pi, 500)
  8283. >>> vonmises_pdf = vonmises.pdf(x, loc=loc, kappa=kappa)
  8284. >>> ticks = [0, 0.15, 0.3]
  8285. The left image contains the Cartesian plot.
  8286. >>> left.plot(x, vonmises_pdf)
  8287. >>> left.set_yticks(ticks)
  8288. >>> number_of_bins = int(np.sqrt(sample_size))
  8289. >>> left.hist(sample, density=True, bins=number_of_bins)
  8290. >>> left.set_title("Cartesian plot")
  8291. >>> left.set_xlim(-np.pi, np.pi)
  8292. >>> left.grid(True)
  8293. The right image contains the polar plot.
  8294. >>> right.plot(x, vonmises_pdf, label="PDF")
  8295. >>> right.set_yticks(ticks)
  8296. >>> right.hist(sample, density=True, bins=number_of_bins,
  8297. ... label="Histogram")
  8298. >>> right.set_title("Polar plot")
  8299. >>> right.legend(bbox_to_anchor=(0.15, 1.06))
  8300. """
  8301. def _shape_info(self):
  8302. return [_ShapeInfo("kappa", False, (0, np.inf), (True, False))]
  8303. def _argcheck(self, kappa):
  8304. return kappa >= 0
  8305. def _rvs(self, kappa, size=None, random_state=None):
  8306. return random_state.vonmises(0.0, kappa, size=size)
  8307. @inherit_docstring_from(rv_continuous)
  8308. def rvs(self, *args, **kwds):
  8309. rvs = super().rvs(*args, **kwds)
  8310. return np.mod(rvs + np.pi, 2*np.pi) - np.pi
  8311. def _pdf(self, x, kappa):
  8312. # vonmises.pdf(x, kappa) = exp(kappa * cos(x)) / (2*pi*I[0](kappa))
  8313. # = exp(kappa * (cos(x) - 1)) /
  8314. # (2*pi*exp(-kappa)*I[0](kappa))
  8315. # = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
  8316. return np.exp(kappa*sc.cosm1(x)) / (2*np.pi*sc.i0e(kappa))
  8317. def _logpdf(self, x, kappa):
  8318. # vonmises.pdf(x, kappa) = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
  8319. return kappa * sc.cosm1(x) - np.log(2*np.pi) - np.log(sc.i0e(kappa))
  8320. def _cdf(self, x, kappa):
  8321. return _stats.von_mises_cdf(kappa, x)
  8322. def _stats_skip(self, kappa):
  8323. return 0, None, 0, None
  8324. def _entropy(self, kappa):
  8325. # vonmises.entropy(kappa) = -kappa * I[1](kappa) / I[0](kappa) +
  8326. # log(2 * np.pi * I[0](kappa))
  8327. # = -kappa * I[1](kappa) * exp(-kappa) /
  8328. # (I[0](kappa) * exp(-kappa)) +
  8329. # log(2 * np.pi *
  8330. # I[0](kappa) * exp(-kappa) / exp(-kappa))
  8331. # = -kappa * sc.i1e(kappa) / sc.i0e(kappa) +
  8332. # log(2 * np.pi * i0e(kappa)) + kappa
  8333. return (-kappa * sc.i1e(kappa) / sc.i0e(kappa) +
  8334. np.log(2 * np.pi * sc.i0e(kappa)) + kappa)
  8335. @extend_notes_in_docstring(rv_continuous, notes="""\
  8336. The default limits of integration are endpoints of the interval
  8337. of width ``2*pi`` centered at `loc` (e.g. ``[-pi, pi]`` when
  8338. ``loc=0``).\n\n""")
  8339. def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
  8340. conditional=False, **kwds):
  8341. _a, _b = -np.pi, np.pi
  8342. if lb is None:
  8343. lb = loc + _a
  8344. if ub is None:
  8345. ub = loc + _b
  8346. return super().expect(func, args, loc,
  8347. scale, lb, ub, conditional, **kwds)
  8348. @_call_super_mom
  8349. @extend_notes_in_docstring(rv_continuous, notes="""\
  8350. Fit data is assumed to represent angles and will be wrapped onto the
  8351. unit circle. `f0` and `fscale` are ignored; the returned shape is
  8352. always the maximum likelihood estimate and the scale is always
  8353. 1. Initial guesses are ignored.\n\n""")
  8354. def fit(self, data, *args, **kwds):
  8355. if kwds.pop('superfit', False):
  8356. return super().fit(data, *args, **kwds)
  8357. data, fshape, floc, fscale = _check_fit_input_parameters(self, data,
  8358. args, kwds)
  8359. if self.a == -np.pi:
  8360. # vonmises line case, here the default fit method will be used
  8361. return super().fit(data, *args, **kwds)
  8362. # wrap data to interval [0, 2*pi]
  8363. data = np.mod(data, 2 * np.pi)
  8364. def find_mu(data):
  8365. return stats.circmean(data)
  8366. def find_kappa(data, loc):
  8367. # Usually, sources list the following as the equation to solve for
  8368. # the MLE of the shape parameter:
  8369. # r = I[1](kappa)/I[0](kappa), where r = mean resultant length
  8370. # This is valid when the location is the MLE of location.
  8371. # More generally, when the location may be fixed at an arbitrary
  8372. # value, r should be defined as follows:
  8373. r = np.sum(np.cos(loc - data))/len(data)
  8374. # See gh-18128 for more information.
  8375. # The function r[0](kappa) := I[1](kappa)/I[0](kappa) is monotonic
  8376. # increasing from r[0](0) = 0 to r[0](+inf) = 1. The partial
  8377. # derivative of the log likelihood function with respect to kappa
  8378. # is monotonic decreasing in kappa.
  8379. if r == 1:
  8380. # All observations are (almost) equal to the mean. Return
  8381. # some large kappa such that r[0](kappa) = 1.0 numerically.
  8382. return 1e16
  8383. elif r > 0:
  8384. def solve_for_kappa(kappa):
  8385. return sc.i1e(kappa)/sc.i0e(kappa) - r
  8386. # The bounds of the root of r[0](kappa) = r are derived from
  8387. # selected bounds of r[0](x) given in [1, Eq. 11 & 16]. See
  8388. # gh-20102 for details.
  8389. #
  8390. # [1] Amos, D. E. (1973). Computation of Modified Bessel
  8391. # Functions and Their Ratios. Mathematics of Computation,
  8392. # 28(125): 239-251.
  8393. lower_bound = r/(1-r)/(1+r)
  8394. upper_bound = 2*lower_bound
  8395. # The bounds are violated numerically for certain values of r,
  8396. # where solve_for_kappa evaluated at the bounds have the same
  8397. # sign. This indicates numerical imprecision of i1e()/i0e().
  8398. # Return the violated bound in this case as it's more accurate.
  8399. if solve_for_kappa(lower_bound) >= 0:
  8400. return lower_bound
  8401. elif solve_for_kappa(upper_bound) <= 0:
  8402. return upper_bound
  8403. else:
  8404. root_res = root_scalar(solve_for_kappa, method="brentq",
  8405. bracket=(lower_bound, upper_bound))
  8406. return root_res.root
  8407. else:
  8408. # if the provided floc is very far from the circular mean,
  8409. # the mean resultant length r can become negative.
  8410. # In that case, the equation
  8411. # I[1](kappa)/I[0](kappa) = r does not have a solution.
  8412. # The maximum likelihood kappa is then 0 which practically
  8413. # results in the uniform distribution on the circle. As
  8414. # vonmises is defined for kappa > 0, return instead the
  8415. # smallest floating point value.
  8416. # See gh-18190 for more information
  8417. return np.finfo(float).tiny
  8418. # location likelihood equation has a solution independent of kappa
  8419. loc = floc if floc is not None else find_mu(data)
  8420. # shape likelihood equation depends on location
  8421. shape = fshape if fshape is not None else find_kappa(data, loc)
  8422. loc = np.mod(loc + np.pi, 2 * np.pi) - np.pi # ensure in [-pi, pi]
  8423. return shape, loc, 1 # scale is not handled
  8424. vonmises = vonmises_gen(name='vonmises')
  8425. vonmises_line = vonmises_gen(a=-np.pi, b=np.pi, name='vonmises_line')
  8426. class wald_gen(invgauss_gen):
  8427. r"""A Wald continuous random variable.
  8428. %(before_notes)s
  8429. Notes
  8430. -----
  8431. The probability density function for `wald` is:
  8432. .. math::
  8433. f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp(- \frac{ (x-1)^2 }{ 2x })
  8434. for :math:`x >= 0`.
  8435. `wald` is a special case of `invgauss` with ``mu=1``.
  8436. %(after_notes)s
  8437. %(example)s
  8438. """
  8439. _support_mask = rv_continuous._open_support_mask
  8440. def _shape_info(self):
  8441. return []
  8442. def _rvs(self, size=None, random_state=None):
  8443. return random_state.wald(1.0, 1.0, size=size)
  8444. def _pdf(self, x):
  8445. # wald.pdf(x) = 1/sqrt(2*pi*x**3) * exp(-(x-1)**2/(2*x))
  8446. return invgauss._pdf(x, 1.0)
  8447. def _cdf(self, x):
  8448. return invgauss._cdf(x, 1.0)
  8449. def _sf(self, x):
  8450. return invgauss._sf(x, 1.0)
  8451. def _ppf(self, x):
  8452. return invgauss._ppf(x, 1.0)
  8453. def _isf(self, x):
  8454. return invgauss._isf(x, 1.0)
  8455. def _logpdf(self, x):
  8456. return invgauss._logpdf(x, 1.0)
  8457. def _logcdf(self, x):
  8458. return invgauss._logcdf(x, 1.0)
  8459. def _logsf(self, x):
  8460. return invgauss._logsf(x, 1.0)
  8461. def _stats(self):
  8462. return 1.0, 1.0, 3.0, 15.0
  8463. def _entropy(self):
  8464. return invgauss._entropy(1.0)
  8465. wald = wald_gen(a=0.0, name="wald")
  8466. class wrapcauchy_gen(rv_continuous):
  8467. r"""A wrapped Cauchy continuous random variable.
  8468. %(before_notes)s
  8469. Notes
  8470. -----
  8471. The probability density function for `wrapcauchy` is:
  8472. .. math::
  8473. f(x, c) = \frac{1-c^2}{2\pi (1+c^2 - 2c \cos(x))}
  8474. for :math:`0 \le x \le 2\pi`, :math:`0 < c < 1`.
  8475. `wrapcauchy` takes ``c`` as a shape parameter for :math:`c`.
  8476. %(after_notes)s
  8477. %(example)s
  8478. """
  8479. def _argcheck(self, c):
  8480. return (c > 0) & (c < 1)
  8481. def _shape_info(self):
  8482. return [_ShapeInfo("c", False, (0, 1), (False, False))]
  8483. def _pdf(self, x, c):
  8484. # wrapcauchy.pdf(x, c) = (1-c**2) / (2*pi*(1+c**2-2*c*cos(x)))
  8485. return (1.0-c*c)/(2*np.pi*(1+c*c-2*c*np.cos(x)))
  8486. def _cdf(self, x, c):
  8487. def f1(x, cr):
  8488. # CDF for 0 <= x < pi
  8489. return 1/np.pi * np.arctan(cr*np.tan(x/2))
  8490. def f2(x, cr):
  8491. # CDF for pi <= x <= 2*pi
  8492. return 1 - 1/np.pi * np.arctan(cr*np.tan((2*np.pi - x)/2))
  8493. cr = (1 + c)/(1 - c)
  8494. return xpx.apply_where(x < np.pi, (x, cr), f1, f2)
  8495. def _ppf(self, q, c):
  8496. val = (1.0-c)/(1.0+c)
  8497. rcq = 2*np.arctan(val*np.tan(np.pi*q))
  8498. rcmq = 2*np.pi-2*np.arctan(val*np.tan(np.pi*(1-q)))
  8499. return np.where(q < 1.0/2, rcq, rcmq)
  8500. def _entropy(self, c):
  8501. return np.log(2*np.pi*(1-c*c))
  8502. def _fitstart(self, data):
  8503. # Use 0.5 as the initial guess of the shape parameter.
  8504. # For the location and scale, use the minimum and
  8505. # peak-to-peak/(2*pi), respectively.
  8506. if isinstance(data, CensoredData):
  8507. data = data._uncensor()
  8508. return 0.5, np.min(data), np.ptp(data)/(2*np.pi)
  8509. @inherit_docstring_from(rv_continuous)
  8510. def rvs(self, *args, **kwds):
  8511. rvs = super().rvs(*args, **kwds)
  8512. return np.mod(rvs, 2*np.pi)
  8513. wrapcauchy = wrapcauchy_gen(a=0.0, b=2*np.pi, name='wrapcauchy')
  8514. class gennorm_gen(rv_continuous):
  8515. r"""A generalized normal continuous random variable.
  8516. %(before_notes)s
  8517. See Also
  8518. --------
  8519. laplace : Laplace distribution
  8520. norm : normal distribution
  8521. Notes
  8522. -----
  8523. The probability density function for `gennorm` is [1]_:
  8524. .. math::
  8525. f(x, \beta) = \frac{\beta}{2 \Gamma(1/\beta)} \exp(-|x|^\beta),
  8526. where :math:`x` is a real number, :math:`\beta > 0` and
  8527. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  8528. `gennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
  8529. For :math:`\beta = 1`, it is identical to a Laplace distribution.
  8530. For :math:`\beta = 2`, it is identical to a normal distribution
  8531. (with ``scale=1/sqrt(2)``).
  8532. References
  8533. ----------
  8534. .. [1] "Generalized normal distribution, Version 1",
  8535. https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
  8536. .. [2] Nardon, Martina, and Paolo Pianca. "Simulation techniques for
  8537. generalized Gaussian densities." Journal of Statistical
  8538. Computation and Simulation 79.11 (2009): 1317-1329
  8539. .. [3] Wicklin, Rick. "Simulate data from a generalized Gaussian
  8540. distribution" in The DO Loop blog, September 21, 2016,
  8541. https://blogs.sas.com/content/iml/2016/09/21/simulate-generalized-gaussian-sas.html
  8542. %(example)s
  8543. """
  8544. def _shape_info(self):
  8545. return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
  8546. def _pdf(self, x, beta):
  8547. return np.exp(self._logpdf(x, beta))
  8548. def _logpdf(self, x, beta):
  8549. return np.log(0.5*beta) - sc.gammaln(1.0/beta) - abs(x)**beta
  8550. def _cdf(self, x, beta):
  8551. c = 0.5 * np.sign(x)
  8552. # evaluating (.5 + c) first prevents numerical cancellation
  8553. return (0.5 + c) - c * sc.gammaincc(1.0/beta, abs(x)**beta)
  8554. def _ppf(self, x, beta):
  8555. c = np.sign(x - 0.5)
  8556. # evaluating (1. + c) first prevents numerical cancellation
  8557. return c * sc.gammainccinv(1.0/beta, (1.0 + c) - 2.0*c*x)**(1.0/beta)
  8558. def _sf(self, x, beta):
  8559. return self._cdf(-x, beta)
  8560. def _isf(self, x, beta):
  8561. return -self._ppf(x, beta)
  8562. def _munp(self, n, beta):
  8563. if n == 0:
  8564. return 1.
  8565. if n % 2 == 0:
  8566. c1, cn = sc.gammaln([1.0/beta, (n + 1.0)/beta])
  8567. return np.exp(cn - c1)
  8568. else:
  8569. return 0.
  8570. def _stats(self, beta):
  8571. c1, c3, c5 = sc.gammaln([1.0/beta, 3.0/beta, 5.0/beta])
  8572. return 0., np.exp(c3 - c1), 0., np.exp(c5 + c1 - 2.0*c3) - 3.
  8573. def _entropy(self, beta):
  8574. return 1. / beta - np.log(.5 * beta) + sc.gammaln(1. / beta)
  8575. def _rvs(self, beta, size=None, random_state=None):
  8576. # see [2]_ for the algorithm
  8577. # see [3]_ for reference implementation in SAS
  8578. z = random_state.gamma(1/beta, size=size)
  8579. y = z ** (1/beta)
  8580. # convert y to array to ensure masking support
  8581. y = np.asarray(y)
  8582. mask = random_state.random(size=y.shape) < 0.5
  8583. y[mask] = -y[mask]
  8584. return y
  8585. gennorm = gennorm_gen(name='gennorm')
  8586. class halfgennorm_gen(rv_continuous):
  8587. r"""The upper half of a generalized normal continuous random variable.
  8588. %(before_notes)s
  8589. See Also
  8590. --------
  8591. gennorm : generalized normal distribution
  8592. expon : exponential distribution
  8593. halfnorm : half normal distribution
  8594. Notes
  8595. -----
  8596. The probability density function for `halfgennorm` is:
  8597. .. math::
  8598. f(x, \beta) = \frac{\beta}{\Gamma(1/\beta)} \exp(-|x|^\beta)
  8599. for :math:`x, \beta > 0`. :math:`\Gamma` is the gamma function
  8600. (`scipy.special.gamma`).
  8601. `halfgennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
  8602. For :math:`\beta = 1`, it is identical to an exponential distribution.
  8603. For :math:`\beta = 2`, it is identical to a half normal distribution
  8604. (with ``scale=1/sqrt(2)``).
  8605. References
  8606. ----------
  8607. .. [1] "Generalized normal distribution, Version 1",
  8608. https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
  8609. %(example)s
  8610. """
  8611. def _shape_info(self):
  8612. return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
  8613. def _pdf(self, x, beta):
  8614. # beta
  8615. # halfgennorm.pdf(x, beta) = ------------- exp(-|x|**beta)
  8616. # gamma(1/beta)
  8617. return np.exp(self._logpdf(x, beta))
  8618. def _logpdf(self, x, beta):
  8619. return np.log(beta) - sc.gammaln(1.0/beta) - x**beta
  8620. def _cdf(self, x, beta):
  8621. return sc.gammainc(1.0/beta, x**beta)
  8622. def _ppf(self, x, beta):
  8623. return sc.gammaincinv(1.0/beta, x)**(1.0/beta)
  8624. def _sf(self, x, beta):
  8625. return sc.gammaincc(1.0/beta, x**beta)
  8626. def _isf(self, x, beta):
  8627. return sc.gammainccinv(1.0/beta, x)**(1.0/beta)
  8628. def _entropy(self, beta):
  8629. return 1.0/beta - np.log(beta) + sc.gammaln(1.0/beta)
  8630. halfgennorm = halfgennorm_gen(a=0, name='halfgennorm')
  8631. class crystalball_gen(rv_continuous):
  8632. r"""
  8633. Crystalball distribution
  8634. %(before_notes)s
  8635. Notes
  8636. -----
  8637. The probability density function for `crystalball` is:
  8638. .. math::
  8639. f(x, \beta, m) = \begin{cases}
  8640. N \exp(-x^2 / 2), &\text{for } x > -\beta\\
  8641. N A (B - x)^{-m} &\text{for } x \le -\beta
  8642. \end{cases}
  8643. where :math:`A = (m / |\beta|)^m \exp(-\beta^2 / 2)`,
  8644. :math:`B = m/|\beta| - |\beta|` and :math:`N` is a normalisation constant.
  8645. `crystalball` takes :math:`\beta > 0` and :math:`m > 1` as shape
  8646. parameters. :math:`\beta` defines the point where the pdf changes
  8647. from a power-law to a Gaussian distribution. :math:`m` is the power
  8648. of the power-law tail.
  8649. %(after_notes)s
  8650. .. versionadded:: 0.19.0
  8651. References
  8652. ----------
  8653. .. [1] "Crystal Ball Function",
  8654. https://en.wikipedia.org/wiki/Crystal_Ball_function
  8655. %(example)s
  8656. """
  8657. def _argcheck(self, beta, m):
  8658. """
  8659. Shape parameter bounds are m > 1 and beta > 0.
  8660. """
  8661. return (m > 1) & (beta > 0)
  8662. def _shape_info(self):
  8663. ibeta = _ShapeInfo("beta", False, (0, np.inf), (False, False))
  8664. im = _ShapeInfo("m", False, (1, np.inf), (False, False))
  8665. return [ibeta, im]
  8666. def _fitstart(self, data):
  8667. # Arbitrary, but the default m=1 is not valid
  8668. return super()._fitstart(data, args=(1, 1.5))
  8669. def _pdf(self, x, beta, m):
  8670. """
  8671. Return PDF of the crystalball function.
  8672. --
  8673. | exp(-x**2 / 2), for x > -beta
  8674. crystalball.pdf(x, beta, m) = N * |
  8675. | A * (B - x)**(-m), for x <= -beta
  8676. --
  8677. """
  8678. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8679. _norm_pdf_C * _norm_cdf(beta))
  8680. def rhs(x, beta, m):
  8681. return np.exp(-x**2 / 2)
  8682. def lhs(x, beta, m):
  8683. return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
  8684. (m/beta - beta - x)**(-m))
  8685. return N * xpx.apply_where(x > -beta, (x, beta, m), rhs, lhs)
  8686. def _logpdf(self, x, beta, m):
  8687. """
  8688. Return the log of the PDF of the crystalball function.
  8689. """
  8690. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8691. _norm_pdf_C * _norm_cdf(beta))
  8692. def rhs(x, beta, m):
  8693. return -x**2/2
  8694. def lhs(x, beta, m):
  8695. return m*np.log(m/beta) - beta**2/2 - m*np.log(m/beta - beta - x)
  8696. return np.log(N) + xpx.apply_where(x > -beta, (x, beta, m), rhs, lhs)
  8697. def _cdf(self, x, beta, m):
  8698. """
  8699. Return CDF of the crystalball function
  8700. """
  8701. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8702. _norm_pdf_C * _norm_cdf(beta))
  8703. def rhs(x, beta, m):
  8704. return ((m/beta) * np.exp(-beta**2 / 2.0) / (m-1) +
  8705. _norm_pdf_C * (_norm_cdf(x) - _norm_cdf(-beta)))
  8706. def lhs(x, beta, m):
  8707. return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
  8708. (m/beta - beta - x)**(-m+1) / (m-1))
  8709. return N * xpx.apply_where(x > -beta, (x, beta, m), rhs, lhs)
  8710. def _sf(self, x, beta, m):
  8711. """
  8712. Survival function of the crystalball distribution.
  8713. """
  8714. def rhs(x, beta, m):
  8715. # M is the same as 1/N used elsewhere.
  8716. M = m/beta/(m - 1)*np.exp(-beta**2/2) + _norm_pdf_C*_norm_cdf(beta)
  8717. return _norm_pdf_C*_norm_sf(x)/M
  8718. def lhs(x, beta, m):
  8719. # Default behavior is OK in the left tail of the SF.
  8720. return 1 - self._cdf(x, beta, m)
  8721. return xpx.apply_where(x > -beta, (x, beta, m), rhs, lhs)
  8722. def _ppf(self, p, beta, m):
  8723. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8724. _norm_pdf_C * _norm_cdf(beta))
  8725. pbeta = N * (m/beta) * np.exp(-beta**2/2) / (m - 1)
  8726. def ppf_less(p, beta, m):
  8727. eb2 = np.exp(-beta**2/2)
  8728. C = (m/beta) * eb2 / (m-1)
  8729. N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
  8730. return (m/beta - beta -
  8731. ((m - 1)*(m/beta)**(-m)/eb2*p/N)**(1/(1-m)))
  8732. def ppf_greater(p, beta, m):
  8733. eb2 = np.exp(-beta**2/2)
  8734. C = (m/beta) * eb2 / (m-1)
  8735. N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
  8736. return _norm_ppf(_norm_cdf(-beta) + (1/_norm_pdf_C)*(p/N - C))
  8737. return xpx.apply_where(p < pbeta, (p, beta, m), ppf_less, ppf_greater)
  8738. def _munp(self, n, beta, m):
  8739. """
  8740. Returns the n-th non-central moment of the crystalball function.
  8741. """
  8742. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8743. _norm_pdf_C * _norm_cdf(beta))
  8744. def n_th_moment(n, beta, m):
  8745. """
  8746. Returns n-th moment. Defined only if n+1 < m
  8747. Function cannot broadcast due to the loop over n
  8748. """
  8749. A = (m/beta)**m * np.exp(-beta**2 / 2.0)
  8750. B = m/beta - beta
  8751. rhs = (2**((n-1)/2.0) * sc.gamma((n+1)/2) *
  8752. (1.0 + (-1)**n * sc.gammainc((n+1)/2, beta**2 / 2)))
  8753. lhs = np.zeros(rhs.shape)
  8754. for k in range(int(n) + 1):
  8755. lhs += (sc.binom(n, k) * B**(n-k) * (-1)**k / (m - k - 1) *
  8756. (m/beta)**(-m + k + 1))
  8757. return A * lhs + rhs
  8758. return N * xpx.apply_where(n + 1 < m, (n, beta, m),
  8759. np.vectorize(n_th_moment, otypes=[np.float64]),
  8760. fill_value=np.inf)
  8761. crystalball = crystalball_gen(name='crystalball', longname="A Crystalball Function")
  8762. def _argus_phi(chi):
  8763. """
  8764. Utility function for the argus distribution used in the pdf, sf and
  8765. moment calculation.
  8766. Note that for all x > 0:
  8767. gammainc(1.5, x**2/2) = 2 * (_norm_cdf(x) - x * _norm_pdf(x) - 0.5).
  8768. This can be verified directly by noting that the cdf of Gamma(1.5) can
  8769. be written as erf(sqrt(x)) - 2*sqrt(x)*exp(-x)/sqrt(Pi).
  8770. We use gammainc instead of the usual definition because it is more precise
  8771. for small chi.
  8772. """
  8773. return sc.gammainc(1.5, chi**2/2) / 2
  8774. class argus_gen(rv_continuous):
  8775. r"""
  8776. Argus distribution
  8777. %(before_notes)s
  8778. Notes
  8779. -----
  8780. The probability density function for `argus` is:
  8781. .. math::
  8782. f(x, \chi) = \frac{\chi^3}{\sqrt{2\pi} \Psi(\chi)} x \sqrt{1-x^2}
  8783. \exp(-\chi^2 (1 - x^2)/2)
  8784. for :math:`0 < x < 1` and :math:`\chi > 0`, where
  8785. .. math::
  8786. \Psi(\chi) = \Phi(\chi) - \chi \phi(\chi) - 1/2
  8787. with :math:`\Phi` and :math:`\phi` being the CDF and PDF of a standard
  8788. normal distribution, respectively.
  8789. `argus` takes :math:`\chi` as shape a parameter. Details about sampling
  8790. from the ARGUS distribution can be found in [2]_.
  8791. %(after_notes)s
  8792. References
  8793. ----------
  8794. .. [1] "ARGUS distribution",
  8795. https://en.wikipedia.org/wiki/ARGUS_distribution
  8796. .. [2] Christoph Baumgarten "Random variate generation by fast numerical
  8797. inversion in the varying parameter case." Research in Statistics,
  8798. vol. 1, 2023. :doi:`10.1080/27684520.2023.2279060`
  8799. .. versionadded:: 0.19.0
  8800. %(example)s
  8801. """
  8802. def _shape_info(self):
  8803. return [_ShapeInfo("chi", False, (0, np.inf), (False, False))]
  8804. def _logpdf(self, x, chi):
  8805. # for x = 0 or 1, logpdf returns -np.inf
  8806. with np.errstate(divide='ignore'):
  8807. y = 1.0 - x*x
  8808. A = 3*np.log(chi) - _norm_pdf_logC - np.log(_argus_phi(chi))
  8809. return A + np.log(x) + 0.5*np.log1p(-x*x) - chi**2 * y / 2
  8810. def _pdf(self, x, chi):
  8811. return np.exp(self._logpdf(x, chi))
  8812. def _cdf(self, x, chi):
  8813. return 1.0 - self._sf(x, chi)
  8814. def _sf(self, x, chi):
  8815. return _argus_phi(chi * np.sqrt((1 - x)*(1 + x))) / _argus_phi(chi)
  8816. def _rvs(self, chi, size=None, random_state=None):
  8817. chi = np.asarray(chi)
  8818. if chi.size == 1:
  8819. out = self._rvs_scalar(chi, numsamples=size,
  8820. random_state=random_state)
  8821. else:
  8822. shp, bc = _check_shape(chi.shape, size)
  8823. numsamples = int(np.prod(shp))
  8824. out = np.empty(size)
  8825. it = np.nditer([chi],
  8826. flags=['multi_index'],
  8827. op_flags=[['readonly']])
  8828. while not it.finished:
  8829. idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
  8830. for j in range(-len(size), 0))
  8831. r = self._rvs_scalar(it[0], numsamples=numsamples,
  8832. random_state=random_state)
  8833. out[idx] = r.reshape(shp)
  8834. it.iternext()
  8835. if size == ():
  8836. out = out[()]
  8837. return out
  8838. def _rvs_scalar(self, chi, numsamples=None, random_state=None):
  8839. # if chi <= 1.8:
  8840. # use rejection method, see Devroye:
  8841. # Non-Uniform Random Variate Generation, 1986, section II.3.2.
  8842. # write: PDF f(x) = c * g(x) * h(x), where
  8843. # h is [0,1]-valued and g is a density
  8844. # we use two ways to write f
  8845. #
  8846. # Case 1:
  8847. # write g(x) = 3*x*sqrt(1-x**2), h(x) = exp(-chi**2 (1-x**2) / 2)
  8848. # If X has a distribution with density g its ppf G_inv is given by:
  8849. # G_inv(u) = np.sqrt(1 - u**(2/3))
  8850. #
  8851. # Case 2:
  8852. # g(x) = chi**2 * x * exp(-chi**2 * (1-x**2)/2) / (1 - exp(-chi**2 /2))
  8853. # h(x) = sqrt(1 - x**2), 0 <= x <= 1
  8854. # one can show that
  8855. # G_inv(u) = np.sqrt(2*np.log(u*(np.exp(chi**2/2)-1)+1))/chi
  8856. # = np.sqrt(1 + 2*np.log(np.exp(-chi**2/2)*(1-u)+u)/chi**2)
  8857. # the latter expression is used for precision with small chi
  8858. #
  8859. # In both cases, the inverse cdf of g can be written analytically, and
  8860. # we can apply the rejection method:
  8861. #
  8862. # REPEAT
  8863. # Generate U uniformly distributed on [0, 1]
  8864. # Generate X with density g (e.g. via inverse transform sampling:
  8865. # X = G_inv(V) with V uniformly distributed on [0, 1])
  8866. # UNTIL X <= h(X)
  8867. # RETURN X
  8868. #
  8869. # We use case 1 for chi <= 0.5 as it maintains precision for small chi
  8870. # and case 2 for 0.5 < chi <= 1.8 due to its speed for moderate chi.
  8871. #
  8872. # if chi > 1.8:
  8873. # use relation to the Gamma distribution: if X is ARGUS with parameter
  8874. # chi), then Y = chi**2 * (1 - X**2) / 2 has density proportional to
  8875. # sqrt(u) * exp(-u) on [0, chi**2 / 2], i.e. a Gamma(3/2) distribution
  8876. # conditioned on [0, chi**2 / 2]). Therefore, to sample X from the
  8877. # ARGUS distribution, we sample Y from the gamma distribution, keeping
  8878. # only samples on [0, chi**2 / 2], and apply the inverse
  8879. # transformation X = (1 - 2*Y/chi**2)**(1/2). Since we only
  8880. # look at chi > 1.8, gamma(1.5).cdf(chi**2/2) is large enough such
  8881. # Y falls in the interval [0, chi**2 / 2] with a high probability:
  8882. # stats.gamma(1.5).cdf(1.8**2/2) = 0.644...
  8883. #
  8884. # The points to switch between the different methods are determined
  8885. # by a comparison of the runtime of the different methods. However,
  8886. # the runtime is platform-dependent. The implemented values should
  8887. # ensure a good overall performance and are supported by an analysis
  8888. # of the rejection constants of different methods.
  8889. size1d = tuple(np.atleast_1d(numsamples))
  8890. N = int(np.prod(size1d))
  8891. x = np.zeros(N)
  8892. simulated = 0
  8893. chi2 = chi * chi
  8894. if chi <= 0.5:
  8895. d = -chi2 / 2
  8896. while simulated < N:
  8897. k = N - simulated
  8898. u = random_state.uniform(size=k)
  8899. v = random_state.uniform(size=k)
  8900. z = v**(2/3)
  8901. # acceptance condition: u <= h(G_inv(v)). This simplifies to
  8902. accept = (np.log(u) <= d * z)
  8903. num_accept = np.sum(accept)
  8904. if num_accept > 0:
  8905. # we still need to transform z=v**(2/3) to X = G_inv(v)
  8906. rvs = np.sqrt(1 - z[accept])
  8907. x[simulated:(simulated + num_accept)] = rvs
  8908. simulated += num_accept
  8909. elif chi <= 1.8:
  8910. echi = np.exp(-chi2 / 2)
  8911. while simulated < N:
  8912. k = N - simulated
  8913. u = random_state.uniform(size=k)
  8914. v = random_state.uniform(size=k)
  8915. z = 2 * np.log(echi * (1 - v) + v) / chi2
  8916. # as in case one, simplify u <= h(G_inv(v)) and then transform
  8917. # z to the target distribution X = G_inv(v)
  8918. accept = (u**2 + z <= 0)
  8919. num_accept = np.sum(accept)
  8920. if num_accept > 0:
  8921. rvs = np.sqrt(1 + z[accept])
  8922. x[simulated:(simulated + num_accept)] = rvs
  8923. simulated += num_accept
  8924. else:
  8925. # conditional Gamma for chi > 1.8
  8926. while simulated < N:
  8927. k = N - simulated
  8928. g = random_state.standard_gamma(1.5, size=k)
  8929. accept = (g <= chi2 / 2)
  8930. num_accept = np.sum(accept)
  8931. if num_accept > 0:
  8932. x[simulated:(simulated + num_accept)] = g[accept]
  8933. simulated += num_accept
  8934. x = np.sqrt(1 - 2 * x / chi2)
  8935. return np.reshape(x, size1d)
  8936. def _stats(self, chi):
  8937. # need to ensure that dtype is float
  8938. # otherwise the mask below does not work for integers
  8939. chi = np.asarray(chi, dtype=float)
  8940. phi = _argus_phi(chi)
  8941. m = np.sqrt(np.pi/8) * chi * sc.ive(1, chi**2/4) / phi
  8942. # compute second moment, use Taylor expansion for small chi (<= 0.1)
  8943. mu2 = np.empty_like(chi)
  8944. mask = chi > 0.1
  8945. c = chi[mask]
  8946. mu2[mask] = 1 - 3 / c**2 + c * _norm_pdf(c) / phi[mask]
  8947. c = chi[~mask]
  8948. coef = [-358/65690625, 0, -94/1010625, 0, 2/2625, 0, 6/175, 0, 0.4]
  8949. mu2[~mask] = np.polyval(coef, c)
  8950. return m, mu2 - m**2, None, None
  8951. argus = argus_gen(name='argus', longname="An Argus Function", a=0.0, b=1.0)
  8952. class rv_histogram(rv_continuous):
  8953. """
  8954. Generates a distribution given by a histogram.
  8955. This is useful to generate a template distribution from a binned
  8956. datasample.
  8957. As a subclass of the `rv_continuous` class, `rv_histogram` inherits from it
  8958. a collection of generic methods (see `rv_continuous` for the full list),
  8959. and implements them based on the properties of the provided binned
  8960. datasample.
  8961. Parameters
  8962. ----------
  8963. histogram : tuple of array_like
  8964. Tuple containing two array_like objects.
  8965. The first containing the content of n bins,
  8966. the second containing the (n+1) bin boundaries.
  8967. In particular, the return value of `numpy.histogram` is accepted.
  8968. density : bool, optional
  8969. If False, assumes the histogram is proportional to counts per bin;
  8970. otherwise, assumes it is proportional to a density.
  8971. For constant bin widths, these are equivalent, but the distinction
  8972. is important when bin widths vary (see Notes).
  8973. If None (default), sets ``density=True`` for backwards compatibility,
  8974. but warns if the bin widths are variable. Set `density` explicitly
  8975. to silence the warning.
  8976. .. versionadded:: 1.10.0
  8977. Notes
  8978. -----
  8979. When a histogram has unequal bin widths, there is a distinction between
  8980. histograms that are proportional to counts per bin and histograms that are
  8981. proportional to probability density over a bin. If `numpy.histogram` is
  8982. called with its default ``density=False``, the resulting histogram is the
  8983. number of counts per bin, so ``density=False`` should be passed to
  8984. `rv_histogram`. If `numpy.histogram` is called with ``density=True``, the
  8985. resulting histogram is in terms of probability density, so ``density=True``
  8986. should be passed to `rv_histogram`. To avoid warnings, always pass
  8987. ``density`` explicitly when the input histogram has unequal bin widths.
  8988. There are no additional shape parameters except for the loc and scale.
  8989. The pdf is defined as a stepwise function from the provided histogram.
  8990. The cdf is a linear interpolation of the pdf.
  8991. .. versionadded:: 0.19.0
  8992. Examples
  8993. --------
  8994. Create a scipy.stats distribution from a numpy histogram
  8995. >>> import scipy.stats
  8996. >>> import numpy as np
  8997. >>> data = scipy.stats.norm.rvs(size=100000, loc=0, scale=1.5,
  8998. ... random_state=123)
  8999. >>> hist = np.histogram(data, bins=100)
  9000. >>> hist_dist = scipy.stats.rv_histogram(hist, density=False)
  9001. Behaves like an ordinary scipy rv_continuous distribution
  9002. >>> hist_dist.pdf(1.0)
  9003. 0.20538577847618705
  9004. >>> hist_dist.cdf(2.0)
  9005. 0.90818568543056499
  9006. PDF is zero above (below) the highest (lowest) bin of the histogram,
  9007. defined by the max (min) of the original dataset
  9008. >>> hist_dist.pdf(np.max(data))
  9009. 0.0
  9010. >>> hist_dist.cdf(np.max(data))
  9011. 1.0
  9012. >>> hist_dist.pdf(np.min(data))
  9013. 7.7591907244498314e-05
  9014. >>> hist_dist.cdf(np.min(data))
  9015. 0.0
  9016. PDF and CDF follow the histogram
  9017. >>> import matplotlib.pyplot as plt
  9018. >>> X = np.linspace(-5.0, 5.0, 100)
  9019. >>> fig, ax = plt.subplots()
  9020. >>> ax.set_title("PDF from Template")
  9021. >>> ax.hist(data, density=True, bins=100)
  9022. >>> ax.plot(X, hist_dist.pdf(X), label='PDF')
  9023. >>> ax.plot(X, hist_dist.cdf(X), label='CDF')
  9024. >>> ax.legend()
  9025. >>> fig.show()
  9026. """
  9027. _support_mask = rv_continuous._support_mask
  9028. def __init__(self, histogram, *args, density=None, **kwargs):
  9029. """
  9030. Create a new distribution using the given histogram
  9031. Parameters
  9032. ----------
  9033. histogram : tuple of array_like
  9034. Tuple containing two array_like objects.
  9035. The first containing the content of n bins,
  9036. the second containing the (n+1) bin boundaries.
  9037. In particular, the return value of np.histogram is accepted.
  9038. density : bool, optional
  9039. If False, assumes the histogram is proportional to counts per bin;
  9040. otherwise, assumes it is proportional to a density.
  9041. For constant bin widths, these are equivalent.
  9042. If None (default), sets ``density=True`` for backward
  9043. compatibility, but warns if the bin widths are variable. Set
  9044. `density` explicitly to silence the warning.
  9045. """
  9046. self._histogram = histogram
  9047. self._density = density
  9048. if len(histogram) != 2:
  9049. raise ValueError("Expected length 2 for parameter histogram")
  9050. self._hpdf = np.asarray(histogram[0])
  9051. self._hbins = np.asarray(histogram[1])
  9052. if len(self._hpdf) + 1 != len(self._hbins):
  9053. raise ValueError("Number of elements in histogram content "
  9054. "and histogram boundaries do not match, "
  9055. "expected n and n+1.")
  9056. self._hbin_widths = self._hbins[1:] - self._hbins[:-1]
  9057. bins_vary = not np.allclose(self._hbin_widths, self._hbin_widths[0])
  9058. if density is None and bins_vary:
  9059. message = ("Bin widths are not constant. Assuming `density=True`."
  9060. "Specify `density` explicitly to silence this warning.")
  9061. warnings.warn(message, RuntimeWarning, stacklevel=2)
  9062. density = True
  9063. elif not density:
  9064. self._hpdf = self._hpdf / self._hbin_widths
  9065. self._hpdf = self._hpdf / float(np.sum(self._hpdf * self._hbin_widths))
  9066. self._hcdf = np.cumsum(self._hpdf * self._hbin_widths)
  9067. self._hpdf = np.hstack([0.0, self._hpdf, 0.0])
  9068. self._hcdf = np.hstack([0.0, self._hcdf])
  9069. # Set support
  9070. kwargs['a'] = self.a = self._hbins[0]
  9071. kwargs['b'] = self.b = self._hbins[-1]
  9072. super().__init__(*args, **kwargs)
  9073. def _pdf(self, x):
  9074. """
  9075. PDF of the histogram
  9076. """
  9077. return self._hpdf[np.searchsorted(self._hbins, x, side='right')]
  9078. def _cdf(self, x):
  9079. """
  9080. CDF calculated from the histogram
  9081. """
  9082. return np.interp(x, self._hbins, self._hcdf)
  9083. def _ppf(self, x):
  9084. """
  9085. Percentile function calculated from the histogram
  9086. """
  9087. return np.interp(x, self._hcdf, self._hbins)
  9088. def _munp(self, n):
  9089. """Compute the n-th non-central moment."""
  9090. integrals = (self._hbins[1:]**(n+1) - self._hbins[:-1]**(n+1)) / (n+1)
  9091. return np.sum(self._hpdf[1:-1] * integrals)
  9092. def _entropy(self):
  9093. """Compute entropy of distribution"""
  9094. hpdf = self._hpdf[1:-1]
  9095. res = xpx.apply_where(hpdf > 0.0, hpdf, np.log, fill_value=0.0)
  9096. return -np.sum(hpdf * res * self._hbin_widths)
  9097. def _updated_ctor_param(self):
  9098. """
  9099. Set the histogram as additional constructor argument
  9100. """
  9101. dct = super()._updated_ctor_param()
  9102. dct['histogram'] = self._histogram
  9103. dct['density'] = self._density
  9104. return dct
  9105. class studentized_range_gen(rv_continuous):
  9106. r"""A studentized range continuous random variable.
  9107. %(before_notes)s
  9108. See Also
  9109. --------
  9110. t: Student's t distribution
  9111. Notes
  9112. -----
  9113. The probability density function for `studentized_range` is:
  9114. .. math::
  9115. f(x; k, \nu) = \frac{k(k-1)\nu^{\nu/2}}{\Gamma(\nu/2)
  9116. 2^{\nu/2-1}} \int_{0}^{\infty} \int_{-\infty}^{\infty}
  9117. s^{\nu} e^{-\nu s^2/2} \phi(z) \phi(sx + z)
  9118. [\Phi(sx + z) - \Phi(z)]^{k-2} \,dz \,ds
  9119. for :math:`x ≥ 0`, :math:`k > 1`, and :math:`\nu > 0`.
  9120. `studentized_range` takes ``k`` for :math:`k` and ``df`` for :math:`\nu`
  9121. as shape parameters.
  9122. When :math:`\nu` exceeds 100,000, an asymptotic approximation (infinite
  9123. degrees of freedom) is used to compute the cumulative distribution
  9124. function [4]_ and probability distribution function.
  9125. %(after_notes)s
  9126. References
  9127. ----------
  9128. .. [1] "Studentized range distribution",
  9129. https://en.wikipedia.org/wiki/Studentized_range_distribution
  9130. .. [2] Batista, Ben Dêivide, et al. "Externally Studentized Normal Midrange
  9131. Distribution." Ciência e Agrotecnologia, vol. 41, no. 4, 2017, pp.
  9132. 378-389., doi:10.1590/1413-70542017414047716.
  9133. .. [3] Harter, H. Leon. "Tables of Range and Studentized Range." The Annals
  9134. of Mathematical Statistics, vol. 31, no. 4, 1960, pp. 1122-1147.
  9135. JSTOR, www.jstor.org/stable/2237810. Accessed 18 Feb. 2021.
  9136. .. [4] Lund, R. E., and J. R. Lund. "Algorithm AS 190: Probabilities and
  9137. Upper Quantiles for the Studentized Range." Journal of the Royal
  9138. Statistical Society. Series C (Applied Statistics), vol. 32, no. 2,
  9139. 1983, pp. 204-210. JSTOR, www.jstor.org/stable/2347300. Accessed 18
  9140. Feb. 2021.
  9141. Examples
  9142. --------
  9143. >>> import numpy as np
  9144. >>> from scipy.stats import studentized_range
  9145. >>> import matplotlib.pyplot as plt
  9146. >>> fig, ax = plt.subplots(1, 1)
  9147. Display the probability density function (``pdf``):
  9148. >>> k, df = 3, 10
  9149. >>> x = np.linspace(studentized_range.ppf(0.01, k, df),
  9150. ... studentized_range.ppf(0.99, k, df), 100)
  9151. >>> ax.plot(x, studentized_range.pdf(x, k, df),
  9152. ... 'r-', lw=5, alpha=0.6, label='studentized_range pdf')
  9153. Alternatively, the distribution object can be called (as a function)
  9154. to fix the shape, location and scale parameters. This returns a "frozen"
  9155. RV object holding the given parameters fixed.
  9156. Freeze the distribution and display the frozen ``pdf``:
  9157. >>> rv = studentized_range(k, df)
  9158. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  9159. Check accuracy of ``cdf`` and ``ppf``:
  9160. >>> vals = studentized_range.ppf([0.001, 0.5, 0.999], k, df)
  9161. >>> np.allclose([0.001, 0.5, 0.999], studentized_range.cdf(vals, k, df))
  9162. True
  9163. Rather than using (``studentized_range.rvs``) to generate random variates,
  9164. which is very slow for this distribution, we can approximate the inverse
  9165. CDF using an interpolator, and then perform inverse transform sampling
  9166. with this approximate inverse CDF.
  9167. This distribution has an infinite but thin right tail, so we focus our
  9168. attention on the leftmost 99.9 percent.
  9169. >>> a, b = studentized_range.ppf([0, .999], k, df)
  9170. >>> a, b
  9171. 0, 7.41058083802274
  9172. >>> from scipy.interpolate import interp1d
  9173. >>> rng = np.random.default_rng()
  9174. >>> xs = np.linspace(a, b, 50)
  9175. >>> cdf = studentized_range.cdf(xs, k, df)
  9176. # Create an interpolant of the inverse CDF
  9177. >>> ppf = interp1d(cdf, xs, fill_value='extrapolate')
  9178. # Perform inverse transform sampling using the interpolant
  9179. >>> r = ppf(rng.uniform(size=1000))
  9180. And compare the histogram:
  9181. >>> ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
  9182. >>> ax.legend(loc='best', frameon=False)
  9183. >>> plt.show()
  9184. """
  9185. def _argcheck(self, k, df):
  9186. return (k > 1) & (df > 0)
  9187. def _shape_info(self):
  9188. ik = _ShapeInfo("k", False, (1, np.inf), (False, False))
  9189. idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
  9190. return [ik, idf]
  9191. def _fitstart(self, data):
  9192. # Default is k=1, but that is not a valid value of the parameter.
  9193. return super()._fitstart(data, args=(2, 1))
  9194. def _munp(self, K, k, df):
  9195. cython_symbol = '_studentized_range_moment'
  9196. _a, _b = self._get_support()
  9197. # all three of these are used to create a numpy array so they must
  9198. # be the same shape.
  9199. def _single_moment(K, k, df):
  9200. log_const = _stats._studentized_range_pdf_logconst(k, df)
  9201. arg = [K, k, df, log_const]
  9202. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9203. llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
  9204. ranges = [(-np.inf, np.inf), (0, np.inf), (_a, _b)]
  9205. opts = dict(epsabs=1e-11, epsrel=1e-12)
  9206. return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
  9207. ufunc = np.frompyfunc(_single_moment, 3, 1)
  9208. return np.asarray(ufunc(K, k, df), dtype=np.float64)[()]
  9209. def _pdf(self, x, k, df):
  9210. def _single_pdf(q, k, df):
  9211. # The infinite form of the PDF is derived from the infinite
  9212. # CDF.
  9213. if df < 100000:
  9214. cython_symbol = '_studentized_range_pdf'
  9215. log_const = _stats._studentized_range_pdf_logconst(k, df)
  9216. arg = [q, k, df, log_const]
  9217. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9218. ranges = [(-np.inf, np.inf), (0, np.inf)]
  9219. else:
  9220. cython_symbol = '_studentized_range_pdf_asymptotic'
  9221. arg = [q, k]
  9222. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9223. ranges = [(-np.inf, np.inf)]
  9224. llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
  9225. opts = dict(epsabs=1e-11, epsrel=1e-12)
  9226. return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
  9227. ufunc = np.frompyfunc(_single_pdf, 3, 1)
  9228. return np.asarray(ufunc(x, k, df), dtype=np.float64)[()]
  9229. def _cdf(self, x, k, df):
  9230. def _single_cdf(q, k, df):
  9231. # "When the degrees of freedom V are infinite the probability
  9232. # integral takes [on a] simpler form," and a single asymptotic
  9233. # integral is evaluated rather than the standard double integral.
  9234. # (Lund, Lund, page 205)
  9235. if df < 100000:
  9236. cython_symbol = '_studentized_range_cdf'
  9237. log_const = _stats._studentized_range_cdf_logconst(k, df)
  9238. arg = [q, k, df, log_const]
  9239. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9240. ranges = [(-np.inf, np.inf), (0, np.inf)]
  9241. else:
  9242. cython_symbol = '_studentized_range_cdf_asymptotic'
  9243. arg = [q, k]
  9244. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9245. ranges = [(-np.inf, np.inf)]
  9246. llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
  9247. opts = dict(epsabs=1e-11, epsrel=1e-12)
  9248. return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
  9249. ufunc = np.frompyfunc(_single_cdf, 3, 1)
  9250. # clip p-values to ensure they are in [0, 1].
  9251. return np.clip(np.asarray(ufunc(x, k, df), dtype=np.float64)[()], 0, 1)
  9252. studentized_range = studentized_range_gen(name='studentized_range', a=0,
  9253. b=np.inf)
  9254. class rel_breitwigner_gen(rv_continuous):
  9255. r"""A relativistic Breit-Wigner random variable.
  9256. %(before_notes)s
  9257. See Also
  9258. --------
  9259. cauchy: Cauchy distribution, also known as the Breit-Wigner distribution.
  9260. Notes
  9261. -----
  9262. The probability density function for `rel_breitwigner` is
  9263. .. math::
  9264. f(x, \rho) = \frac{k}{(x^2 - \rho^2)^2 + \rho^2}
  9265. where
  9266. .. math::
  9267. k = \frac{2\sqrt{2}\rho^2\sqrt{\rho^2 + 1}}
  9268. {\pi\sqrt{\rho^2 + \rho\sqrt{\rho^2 + 1}}}
  9269. The relativistic Breit-Wigner distribution is used in high energy physics
  9270. to model resonances [1]_. It gives the uncertainty in the invariant mass,
  9271. :math:`M` [2]_, of a resonance with characteristic mass :math:`M_0` and
  9272. decay-width :math:`\Gamma`, where :math:`M`, :math:`M_0` and :math:`\Gamma`
  9273. are expressed in natural units. In SciPy's parametrization, the shape
  9274. parameter :math:`\rho` is equal to :math:`M_0/\Gamma` and takes values in
  9275. :math:`(0, \infty)`.
  9276. Equivalently, the relativistic Breit-Wigner distribution is said to give
  9277. the uncertainty in the center-of-mass energy :math:`E_{\text{cm}}`. In
  9278. natural units, the speed of light :math:`c` is equal to 1 and the invariant
  9279. mass :math:`M` is equal to the rest energy :math:`Mc^2`. In the
  9280. center-of-mass frame, the rest energy is equal to the total energy [3]_.
  9281. %(after_notes)s
  9282. :math:`\rho = M/\Gamma` and :math:`\Gamma` is the scale parameter. For
  9283. example, if one seeks to model the :math:`Z^0` boson with :math:`M_0
  9284. \approx 91.1876 \text{ GeV}` and :math:`\Gamma \approx 2.4952\text{ GeV}`
  9285. [4]_ one can set ``rho=91.1876/2.4952`` and ``scale=2.4952``.
  9286. To ensure a physically meaningful result when using the `fit` method, one
  9287. should set ``floc=0`` to fix the location parameter to 0.
  9288. References
  9289. ----------
  9290. .. [1] Relativistic Breit-Wigner distribution, Wikipedia,
  9291. https://en.wikipedia.org/wiki/Relativistic_Breit-Wigner_distribution
  9292. .. [2] Invariant mass, Wikipedia,
  9293. https://en.wikipedia.org/wiki/Invariant_mass
  9294. .. [3] Center-of-momentum frame, Wikipedia,
  9295. https://en.wikipedia.org/wiki/Center-of-momentum_frame
  9296. .. [4] M. Tanabashi et al. (Particle Data Group) Phys. Rev. D 98, 030001 -
  9297. Published 17 August 2018
  9298. %(example)s
  9299. """
  9300. def _argcheck(self, rho):
  9301. return rho > 0
  9302. def _shape_info(self):
  9303. return [_ShapeInfo("rho", False, (0, np.inf), (False, False))]
  9304. def _pdf(self, x, rho):
  9305. # C = k / rho**2
  9306. C = np.sqrt(
  9307. 2 * (1 + 1/rho**2) / (1 + np.sqrt(1 + 1/rho**2))
  9308. ) * 2 / np.pi
  9309. with np.errstate(over='ignore'):
  9310. return C / (((x - rho)*(x + rho)/rho)**2 + 1)
  9311. def _cdf(self, x, rho):
  9312. # C = k / (2 * rho**2) / np.sqrt(1 + 1/rho**2)
  9313. C = np.sqrt(2/(1 + np.sqrt(1 + 1/rho**2)))/np.pi
  9314. result = (
  9315. np.sqrt(-1 + 1j/rho)
  9316. * np.arctan(x/np.sqrt(-rho*(rho + 1j)))
  9317. )
  9318. result = C * 2 * np.imag(result)
  9319. # Sometimes above formula produces values greater than 1.
  9320. return np.clip(result, None, 1)
  9321. def _munp(self, n, rho):
  9322. if n == 0:
  9323. return 1.
  9324. if n == 1:
  9325. # C = k / (2 * rho)
  9326. C = np.sqrt(
  9327. 2 * (1 + 1/rho**2) / (1 + np.sqrt(1 + 1/rho**2))
  9328. ) / np.pi * rho
  9329. return C * (np.pi/2 + np.arctan(rho))
  9330. if n == 2:
  9331. # C = pi * k / (4 * rho)
  9332. C = np.sqrt(
  9333. (1 + 1/rho**2) / (2 * (1 + np.sqrt(1 + 1/rho**2)))
  9334. ) * rho
  9335. result = (1 - rho * 1j) / np.sqrt(-1 - 1j/rho)
  9336. return 2 * C * np.real(result)
  9337. else:
  9338. return np.inf
  9339. def _stats(self, rho):
  9340. # Returning None from stats makes public stats use _munp.
  9341. # nan values will be omitted from public stats. Skew and
  9342. # kurtosis are actually infinite.
  9343. return None, None, np.nan, np.nan
  9344. @inherit_docstring_from(rv_continuous)
  9345. def fit(self, data, *args, **kwds):
  9346. # Override rv_continuous.fit to better handle case where floc is set.
  9347. data, _, floc, fscale = _check_fit_input_parameters(
  9348. self, data, args, kwds
  9349. )
  9350. censored = isinstance(data, CensoredData)
  9351. if censored:
  9352. if data.num_censored() == 0:
  9353. # There are no censored values in data, so replace the
  9354. # CensoredData instance with a regular array.
  9355. data = data._uncensored
  9356. censored = False
  9357. if floc is None or censored:
  9358. return super().fit(data, *args, **kwds)
  9359. if fscale is None:
  9360. # The interquartile range approximates the scale parameter gamma.
  9361. # The median approximates rho * gamma.
  9362. p25, p50, p75 = np.quantile(data - floc, [0.25, 0.5, 0.75])
  9363. scale_0 = p75 - p25
  9364. rho_0 = p50 / scale_0
  9365. if not args:
  9366. args = [rho_0]
  9367. if "scale" not in kwds:
  9368. kwds["scale"] = scale_0
  9369. else:
  9370. M_0 = np.median(data - floc)
  9371. rho_0 = M_0 / fscale
  9372. if not args:
  9373. args = [rho_0]
  9374. return super().fit(data, *args, **kwds)
  9375. rel_breitwigner = rel_breitwigner_gen(a=0.0, name="rel_breitwigner")
  9376. # Collect names of classes and objects in this module.
  9377. pairs = list(globals().copy().items())
  9378. _distn_names, _distn_gen_names = get_distribution_names(pairs, rv_continuous)
  9379. __all__ = _distn_names + _distn_gen_names + ['rv_histogram']