test_stats.py 394 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489
  1. """ Test functions for stats module
  2. WRITTEN BY LOUIS LUANGKESORN <lluang@yahoo.com> FOR THE STATS MODULE
  3. BASED ON WILKINSON'S STATISTICS QUIZ
  4. https://www.stanford.edu/~clint/bench/wilk.txt
  5. Additional tests by a host of SciPy developers.
  6. """
  7. import math
  8. import os
  9. import re
  10. import warnings
  11. from collections import namedtuple
  12. from itertools import product
  13. import hypothesis.extra.numpy as npst
  14. import hypothesis
  15. import contextlib
  16. from numpy.testing import (assert_, assert_equal,
  17. assert_almost_equal, assert_array_almost_equal,
  18. assert_array_equal, assert_approx_equal,
  19. assert_allclose, assert_array_less)
  20. import pytest
  21. from pytest import raises as assert_raises
  22. from numpy import array, arange, float32, power
  23. import numpy as np
  24. import scipy.stats as stats
  25. import scipy.stats._mstats_basic as mstats_basic
  26. from scipy.stats._ksstats import kolmogn
  27. from scipy.special._testutils import FuncData
  28. from scipy import optimize, special
  29. from .common_tests import check_named_results
  30. from scipy.stats._axis_nan_policy import (_broadcast_concatenate, SmallSampleWarning,
  31. too_small_nd_omit, too_small_nd_not_omit,
  32. too_small_1d_omit, too_small_1d_not_omit)
  33. from scipy.stats._stats_py import (_chk_asarray, _moment,
  34. LinregressResult, _xp_mean, _xp_var, _SimpleChi2)
  35. from scipy._lib._util import AxisError
  36. from scipy.conftest import skip_xp_invalid_arg
  37. from scipy._lib._array_api import (array_namespace, eager_warns, is_lazy_array,
  38. is_numpy, is_torch, xp_default_dtype, xp_size,
  39. SCIPY_ARRAY_API, make_xp_test_case, xp_ravel,
  40. xp_swapaxes)
  41. from scipy._lib._array_api_no_0d import xp_assert_close, xp_assert_equal
  42. import scipy._lib.array_api_extra as xpx
  43. lazy_xp_modules = [stats]
  44. skip_xp_backends = pytest.mark.skip_xp_backends
  45. xfail_xp_backends = pytest.mark.xfail_xp_backends
  46. """ Numbers in docstrings beginning with 'W' refer to the section numbers
  47. and headings found in the STATISTICS QUIZ of Leland Wilkinson. These are
  48. considered to be essential functionality. True testing and
  49. evaluation of a statistics package requires use of the
  50. NIST Statistical test data. See McCoullough(1999) Assessing The Reliability
  51. of Statistical Software for a test methodology and its
  52. implementation in testing SAS, SPSS, and S-Plus
  53. """
  54. # Datasets
  55. # These data sets are from the nasty.dat sets used by Wilkinson
  56. # For completeness, I should write the relevant tests and count them as failures
  57. # Somewhat acceptable, since this is still beta software. It would count as a
  58. # good target for 1.0 status
  59. X = array([1,2,3,4,5,6,7,8,9], float)
  60. ZERO = array([0,0,0,0,0,0,0,0,0], float)
  61. BIG = array([99999991,99999992,99999993,99999994,99999995,99999996,99999997,
  62. 99999998,99999999], float)
  63. LITTLE = array([0.99999991,0.99999992,0.99999993,0.99999994,0.99999995,0.99999996,
  64. 0.99999997,0.99999998,0.99999999], float)
  65. HUGE = array([1e+12,2e+12,3e+12,4e+12,5e+12,6e+12,7e+12,8e+12,9e+12], float)
  66. TINY = array([1e-12,2e-12,3e-12,4e-12,5e-12,6e-12,7e-12,8e-12,9e-12], float)
  67. ROUND = array([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5], float)
  68. class TestTrimmedStats:
  69. # TODO: write these tests to handle missing values properly
  70. dprec = np.finfo(np.float64).precision
  71. @make_xp_test_case(stats.tmean)
  72. def test_tmean(self, xp):
  73. default_dtype = xp_default_dtype(xp)
  74. x = xp.asarray(X, dtype=default_dtype)
  75. y = stats.tmean(x, (2, 8), (True, True))
  76. xp_assert_close(y, xp.asarray(5.0))
  77. y1 = stats.tmean(x, limits=(2, 8), inclusive=(False, False))
  78. y2 = stats.tmean(x, limits=None)
  79. xp_assert_close(y1, y2)
  80. x_2d = xp.reshape(xp.arange(63.), (9, 7))
  81. y = stats.tmean(x_2d, axis=None)
  82. xp_assert_close(y, xp.mean(x_2d))
  83. y = stats.tmean(x_2d, axis=0)
  84. xp_assert_close(y, xp.mean(x_2d, axis=0))
  85. y = stats.tmean(x_2d, axis=1)
  86. xp_assert_close(y, xp.mean(x_2d, axis=1))
  87. y = stats.tmean(x_2d, limits=(2, 61), axis=None)
  88. xp_assert_close(y, xp.asarray(31.5))
  89. y = stats.tmean(x_2d, limits=(2, 21), axis=0)
  90. y_true = [14, 11.5, 9, 10, 11, 12, 13]
  91. xp_assert_close(y, xp.asarray(y_true))
  92. y = stats.tmean(x_2d, limits=(2, 21), inclusive=(True, False), axis=0)
  93. y_true = [10.5, 11.5, 9, 10, 11, 12, 13]
  94. xp_assert_close(y, xp.asarray(y_true))
  95. x_2d_with_nan = xpx.at(x_2d)[-1, -3:].set(xp.nan, copy=True)
  96. y = stats.tmean(x_2d_with_nan, limits=(1, 13), axis=0)
  97. y_true = [7, 4.5, 5.5, 6.5, xp.nan, xp.nan, xp.nan]
  98. xp_assert_close(y, xp.asarray(y_true))
  99. y = stats.tmean(x_2d, limits=(2, 21), axis=1)
  100. y_true = [4, 10, 17, 21, xp.nan, xp.nan, xp.nan, xp.nan, xp.nan]
  101. xp_assert_close(y, xp.asarray(y_true))
  102. y = stats.tmean(x_2d, limits=(2, 21),
  103. inclusive=(False, True), axis=1)
  104. y_true = [4.5, 10, 17, 21, xp.nan, xp.nan, xp.nan, xp.nan, xp.nan]
  105. xp_assert_close(y, xp.asarray(y_true))
  106. @make_xp_test_case(stats.tvar)
  107. @pytest.mark.filterwarnings(
  108. "ignore:invalid value encountered in divide:RuntimeWarning:dask"
  109. )
  110. def test_tvar(self, xp):
  111. x = xp.asarray(X.tolist()) # use default dtype of xp
  112. y = stats.tvar(x, limits=(2, 8), inclusive=(True, True))
  113. xp_assert_close(y, xp.asarray(4.6666666666666661))
  114. y = stats.tvar(x, limits=None)
  115. xp_assert_close(y, xp.var(x, correction=1))
  116. x_2d = xp.reshape(xp.arange(63.), (9, 7))
  117. y = stats.tvar(x_2d, axis=None)
  118. xp_assert_close(y, xp.var(x_2d, correction=1))
  119. y = stats.tvar(x_2d, axis=0)
  120. xp_assert_close(y, xp.full((7,), 367.5))
  121. y = stats.tvar(x_2d, axis=1)
  122. xp_assert_close(y, xp.full((9,), 4.66666667))
  123. # Limiting some values along one axis
  124. y = stats.tvar(x_2d, limits=(1, 5), axis=1, inclusive=(True, True))
  125. xp_assert_close(y[0], xp.asarray(2.5))
  126. # Limiting all values along one axis
  127. y = stats.tvar(x_2d, limits=(0, 6), axis=1, inclusive=(True, True))
  128. xp_assert_close(y[0], xp.asarray(4.666666666666667))
  129. xp_assert_equal(y[1], xp.asarray(xp.nan))
  130. @make_xp_test_case(stats.tstd)
  131. def test_tstd(self, xp):
  132. x = xp.asarray(X.tolist()) # use default dtype of xp
  133. y = stats.tstd(x, (2, 8), (True, True))
  134. xp_assert_close(y, xp.asarray(2.1602468994692865))
  135. y = stats.tstd(x, limits=None)
  136. xp_assert_close(y, xp.std(x, correction=1))
  137. @make_xp_test_case(stats.tmin)
  138. def test_tmin(self, xp):
  139. x = xp.arange(10.)
  140. xp_assert_equal(stats.tmin(x), xp.asarray(0.))
  141. xp_assert_equal(stats.tmin(x, lowerlimit=0), xp.asarray(0.))
  142. xp_assert_equal(stats.tmin(x, lowerlimit=0, inclusive=False), xp.asarray(1.))
  143. x = xp.reshape(x, (5, 2))
  144. xp_assert_equal(stats.tmin(x, lowerlimit=0, inclusive=False),
  145. xp.asarray([2., 1.]))
  146. xp_assert_equal(stats.tmin(x, axis=1), xp.asarray([0., 2., 4., 6., 8.]))
  147. xp_assert_equal(stats.tmin(x, axis=None), xp.asarray(0.))
  148. x = xpx.at(xp.arange(10.), 9).set(xp.nan)
  149. xp_assert_equal(stats.tmin(x), xp.asarray(xp.nan))
  150. # check that if a full slice is masked, the output returns a
  151. # nan instead of a garbage value.
  152. x = xp.reshape(xp.arange(16), (4, 4))
  153. res = stats.tmin(x, lowerlimit=4, axis=1)
  154. xp_assert_equal(res, xp.asarray([np.nan, 4, 8, 12]))
  155. @skip_xp_backends(np_only=True,
  156. reason="Only NumPy arrays support scalar input/`nan_policy`.")
  157. def test_tmin_scalar_and_nanpolicy(self, xp):
  158. assert_equal(stats.tmin(4), 4)
  159. x = np.arange(10.)
  160. x[9] = np.nan
  161. with warnings.catch_warnings():
  162. warnings.filterwarnings("ignore", "invalid value", RuntimeWarning)
  163. assert_equal(stats.tmin(x, nan_policy='omit'), 0.)
  164. msg = "The input contains nan values"
  165. with assert_raises(ValueError, match=msg):
  166. stats.tmin(x, nan_policy='raise')
  167. msg = "nan_policy must be one of..."
  168. with assert_raises(ValueError, match=msg):
  169. stats.tmin(x, nan_policy='foobar')
  170. @make_xp_test_case(stats.tmax)
  171. def test_tmax(self, xp):
  172. x = xp.arange(10.)
  173. xp_assert_equal(stats.tmax(x), xp.asarray(9.))
  174. xp_assert_equal(stats.tmax(x, upperlimit=9), xp.asarray(9.))
  175. xp_assert_equal(stats.tmax(x, upperlimit=9, inclusive=False), xp.asarray(8.))
  176. x = xp.reshape(x, (5, 2))
  177. xp_assert_equal(stats.tmax(x, upperlimit=9, inclusive=False),
  178. xp.asarray([8., 7.]))
  179. xp_assert_equal(stats.tmax(x, axis=1), xp.asarray([1., 3., 5., 7., 9.]))
  180. xp_assert_equal(stats.tmax(x, axis=None), xp.asarray(9.))
  181. x = xpx.at(xp.arange(10.), 9).set(xp.nan)
  182. xp_assert_equal(stats.tmax(x), xp.asarray(xp.nan))
  183. # check that if a full slice is masked, the output returns a
  184. # nan instead of a garbage value.
  185. with warnings.catch_warnings():
  186. warnings.filterwarnings(
  187. "ignore", "All-NaN slice encountered", RuntimeWarning)
  188. x = xp.reshape(xp.arange(16), (4, 4))
  189. res = stats.tmax(x, upperlimit=11, axis=1)
  190. xp_assert_equal(res, xp.asarray([3, 7, 11, np.nan]))
  191. @skip_xp_backends(np_only=True,
  192. reason="Only NumPy arrays support scalar input/`nan_policy`.")
  193. def test_tmax_scalar_and_nanpolicy(self, xp):
  194. assert_equal(stats.tmax(4), 4)
  195. x = np.arange(10.)
  196. x[6] = np.nan
  197. with warnings.catch_warnings():
  198. warnings.filterwarnings("ignore", "invalid value", RuntimeWarning)
  199. assert_equal(stats.tmax(x, nan_policy='omit'), 9.)
  200. msg = "The input contains nan values"
  201. with assert_raises(ValueError, match=msg):
  202. stats.tmax(x, nan_policy='raise')
  203. msg = "nan_policy must be one of..."
  204. with assert_raises(ValueError, match=msg):
  205. stats.tmax(x, nan_policy='foobar')
  206. @make_xp_test_case(stats.tmin, stats.tmax)
  207. def test_tmin_tmax_int_dtype(self, xp):
  208. x = xp.reshape(xp.arange(10, dtype=xp.int16), (2, 5)).T
  209. # When tmin/tmax don't need to inject any NaNs,
  210. # retain the input dtype. Dask/JAX can't inspect
  211. # the data so they always return float.
  212. expect_dtype = xp_default_dtype(xp) if is_lazy_array(x) else x.dtype
  213. xp_assert_equal(stats.tmin(x), xp.asarray([0, 5], dtype=expect_dtype))
  214. xp_assert_equal(stats.tmax(x), xp.asarray([4, 9], dtype=expect_dtype))
  215. # When they do inject NaNs, all backends behave the same.
  216. xp_assert_equal(stats.tmin(x, lowerlimit=6), xp.asarray([xp.nan, 6.]))
  217. xp_assert_equal(stats.tmax(x, upperlimit=3), xp.asarray([3., xp.nan]))
  218. @skip_xp_backends(eager_only=True, reason="Only with data-dependent output dtype")
  219. @make_xp_test_case(stats.tmin, stats.tmax)
  220. def test_gh_22626(self, xp):
  221. # Test that `tmin`/`tmax` returns exact result with outrageously large integers
  222. x = xp.arange(2**62, 2**62+10)
  223. xp_assert_equal(stats.tmin(x[None, :]), x)
  224. xp_assert_equal(stats.tmax(x[None, :]), x)
  225. @make_xp_test_case(stats.tsem)
  226. def test_tsem(self, xp):
  227. x = xp.asarray(X.tolist()) # use default dtype of xp
  228. y = stats.tsem(x, limits=(3, 8), inclusive=(False, True))
  229. y_ref = xp.asarray([4., 5., 6., 7., 8.])
  230. xp_assert_close(y, xp.std(y_ref, correction=1) / xp_size(y_ref)**0.5)
  231. xp_assert_close(stats.tsem(x, limits=[-1, 10]), stats.tsem(x, limits=None))
  232. class TestPearsonrWilkinson:
  233. """ W.II.D. Compute a correlation matrix on all the variables.
  234. All the correlations, except for ZERO and MISS, should be exactly 1.
  235. ZERO and MISS should have undefined or missing correlations with the
  236. other variables. The same should go for SPEARMAN correlations, if
  237. your program has them.
  238. """
  239. def test_pXX(self):
  240. y = stats.pearsonr(X,X)
  241. r = y[0]
  242. assert_approx_equal(r,1.0)
  243. def test_pXBIG(self):
  244. y = stats.pearsonr(X,BIG)
  245. r = y[0]
  246. assert_approx_equal(r,1.0)
  247. def test_pXLITTLE(self):
  248. y = stats.pearsonr(X,LITTLE)
  249. r = y[0]
  250. assert_approx_equal(r,1.0)
  251. def test_pXHUGE(self):
  252. y = stats.pearsonr(X,HUGE)
  253. r = y[0]
  254. assert_approx_equal(r,1.0)
  255. def test_pXTINY(self):
  256. y = stats.pearsonr(X,TINY)
  257. r = y[0]
  258. assert_approx_equal(r,1.0)
  259. def test_pXROUND(self):
  260. y = stats.pearsonr(X,ROUND)
  261. r = y[0]
  262. assert_approx_equal(r,1.0)
  263. def test_pBIGBIG(self):
  264. y = stats.pearsonr(BIG,BIG)
  265. r = y[0]
  266. assert_approx_equal(r,1.0)
  267. def test_pBIGLITTLE(self):
  268. y = stats.pearsonr(BIG,LITTLE)
  269. r = y[0]
  270. assert_approx_equal(r,1.0)
  271. def test_pBIGHUGE(self):
  272. y = stats.pearsonr(BIG,HUGE)
  273. r = y[0]
  274. assert_approx_equal(r,1.0)
  275. def test_pBIGTINY(self):
  276. y = stats.pearsonr(BIG,TINY)
  277. r = y[0]
  278. assert_approx_equal(r,1.0)
  279. def test_pBIGROUND(self):
  280. y = stats.pearsonr(BIG,ROUND)
  281. r = y[0]
  282. assert_approx_equal(r,1.0)
  283. def test_pLITTLELITTLE(self):
  284. y = stats.pearsonr(LITTLE,LITTLE)
  285. r = y[0]
  286. assert_approx_equal(r,1.0)
  287. def test_pLITTLEHUGE(self):
  288. y = stats.pearsonr(LITTLE,HUGE)
  289. r = y[0]
  290. assert_approx_equal(r,1.0)
  291. def test_pLITTLETINY(self):
  292. y = stats.pearsonr(LITTLE,TINY)
  293. r = y[0]
  294. assert_approx_equal(r,1.0)
  295. def test_pLITTLEROUND(self):
  296. y = stats.pearsonr(LITTLE,ROUND)
  297. r = y[0]
  298. assert_approx_equal(r,1.0)
  299. def test_pHUGEHUGE(self):
  300. y = stats.pearsonr(HUGE,HUGE)
  301. r = y[0]
  302. assert_approx_equal(r,1.0)
  303. def test_pHUGETINY(self):
  304. y = stats.pearsonr(HUGE,TINY)
  305. r = y[0]
  306. assert_approx_equal(r,1.0)
  307. def test_pHUGEROUND(self):
  308. y = stats.pearsonr(HUGE,ROUND)
  309. r = y[0]
  310. assert_approx_equal(r,1.0)
  311. def test_pTINYTINY(self):
  312. y = stats.pearsonr(TINY,TINY)
  313. r = y[0]
  314. assert_approx_equal(r,1.0)
  315. def test_pTINYROUND(self):
  316. y = stats.pearsonr(TINY,ROUND)
  317. r = y[0]
  318. assert_approx_equal(r,1.0)
  319. def test_pROUNDROUND(self):
  320. y = stats.pearsonr(ROUND,ROUND)
  321. r = y[0]
  322. assert_approx_equal(r,1.0)
  323. @make_xp_test_case(stats.pearsonr)
  324. class TestPearsonr:
  325. def test_pearsonr_result_attributes(self):
  326. res = stats.pearsonr(X, X)
  327. attributes = ('correlation', 'pvalue')
  328. check_named_results(res, attributes)
  329. assert_equal(res.correlation, res.statistic)
  330. def test_r_almost_exactly_pos1(self, xp):
  331. a = xp.arange(3.0)
  332. r, prob = stats.pearsonr(a, a)
  333. xp_assert_close(r, xp.asarray(1.0), atol=1e-15)
  334. # With n = len(a) = 3, the error in prob grows like the
  335. # square root of the error in r.
  336. xp_assert_close(prob, xp.asarray(0.0), atol=np.sqrt(2*np.spacing(1.0)))
  337. def test_r_almost_exactly_neg1(self, xp):
  338. a = xp.arange(3.0)
  339. r, prob = stats.pearsonr(a, -a)
  340. xp_assert_close(r, xp.asarray(-1.0), atol=1e-15)
  341. # With n = len(a) = 3, the error in prob grows like the
  342. # square root of the error in r.
  343. xp_assert_close(prob, xp.asarray(0.0), atol=np.sqrt(2*np.spacing(1.0)))
  344. def test_basic(self, xp):
  345. # A basic test, with a correlation coefficient
  346. # that is not 1 or -1.
  347. a = xp.asarray([-1, 0, 1])
  348. b = xp.asarray([0, 0, 3])
  349. r, prob = stats.pearsonr(a, b)
  350. xp_assert_close(r, xp.asarray(3**0.5/2))
  351. xp_assert_close(prob, xp.asarray(1/3))
  352. def test_constant_input(self, xp):
  353. # Zero variance input
  354. # See https://github.com/scipy/scipy/issues/3728
  355. x = xp.asarray([0.667, 0.667, 0.667])
  356. y = xp.asarray([0.123, 0.456, 0.789])
  357. msg = "An input array is constant"
  358. with eager_warns(stats.ConstantInputWarning, match=msg, xp=xp):
  359. r, p = stats.pearsonr(x, y)
  360. xp_assert_close(r, xp.asarray(xp.nan))
  361. xp_assert_close(p, xp.asarray(xp.nan))
  362. @pytest.mark.parametrize('dtype', ['float32', 'float64'])
  363. def test_near_constant_input(self, xp, dtype):
  364. npdtype = getattr(np, dtype)
  365. dtype = getattr(xp, dtype)
  366. # Near constant input (but not constant):
  367. x = xp.asarray([2, 2, 2 + np.spacing(2, dtype=npdtype)], dtype=dtype)
  368. y = xp.asarray([3, 3, 3 + 6*np.spacing(3, dtype=npdtype)], dtype=dtype)
  369. msg = "An input array is nearly constant; the computed"
  370. with eager_warns(stats.NearConstantInputWarning, match=msg, xp=xp):
  371. # r and p are garbage, so don't bother checking them in this case.
  372. # (The exact value of r would be 1.)
  373. stats.pearsonr(x, y)
  374. def test_very_small_input_values(self, xp):
  375. # Very small values in an input. A naive implementation will
  376. # suffer from underflow.
  377. # See https://github.com/scipy/scipy/issues/9353
  378. x = xp.asarray([0.004434375, 0.004756007, 0.003911996, 0.0038005, 0.003409971],
  379. dtype=xp.float64)
  380. y = xp.asarray([2.48e-188, 7.41e-181, 4.09e-208, 2.08e-223, 2.66e-245],
  381. dtype=xp.float64)
  382. r, p = stats.pearsonr(x, y)
  383. # The expected values were computed using mpmath with 80 digits
  384. # of precision.
  385. xp_assert_close(r, xp.asarray(0.7272930540750450, dtype=xp.float64))
  386. xp_assert_close(p, xp.asarray(0.1637805429533202, dtype=xp.float64))
  387. def test_very_large_input_values(self, xp):
  388. # Very large values in an input. A naive implementation will
  389. # suffer from overflow.
  390. # See https://github.com/scipy/scipy/issues/8980
  391. x = 1e90*xp.asarray([0, 0, 0, 1, 1, 1, 1], dtype=xp.float64)
  392. y = 1e90*xp.arange(7, dtype=xp.float64)
  393. r, p = stats.pearsonr(x, y)
  394. # The expected values were computed using mpmath with 80 digits
  395. # of precision.
  396. xp_assert_close(r, xp.asarray(0.8660254037844386, dtype=xp.float64))
  397. xp_assert_close(p, xp.asarray(0.011724811003954638, dtype=xp.float64))
  398. def test_extremely_large_input_values(self, xp):
  399. # Extremely large values in x and y. These values would cause the
  400. # product sigma_x * sigma_y to overflow if the two factors were
  401. # computed independently.
  402. x = xp.asarray([2.3e200, 4.5e200, 6.7e200, 8e200], dtype=xp.float64)
  403. y = xp.asarray([1.2e199, 5.5e200, 3.3e201, 1.0e200], dtype=xp.float64)
  404. r, p = stats.pearsonr(x, y)
  405. # The expected values were computed using mpmath with 80 digits
  406. # of precision.
  407. xp_assert_close(r, xp.asarray(0.351312332103289, dtype=xp.float64))
  408. xp_assert_close(p, xp.asarray(0.648687667896711, dtype=xp.float64))
  409. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  410. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  411. def test_length_two_pos1(self, xp):
  412. # Inputs with length 2.
  413. # See https://github.com/scipy/scipy/issues/7730
  414. x = xp.asarray([1., 2.])
  415. y = xp.asarray([3., 5.])
  416. res = stats.pearsonr(x, y)
  417. r, p = res
  418. one = xp.asarray(1.)
  419. xp_assert_equal(r, one)
  420. xp_assert_equal(p, one)
  421. low, high = res.confidence_interval()
  422. xp_assert_equal(low, -one)
  423. xp_assert_equal(high, one)
  424. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  425. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  426. def test_length_two_neg1(self, xp):
  427. # Inputs with length 2.
  428. # See https://github.com/scipy/scipy/issues/7730
  429. x = xp.asarray([2., 1.])
  430. y = xp.asarray([3., 5.])
  431. res = stats.pearsonr(x, y)
  432. r, p = res
  433. one = xp.asarray(1.)
  434. xp_assert_equal(r, -one)
  435. xp_assert_equal(p, one)
  436. low, high = res.confidence_interval()
  437. xp_assert_equal(low, -one)
  438. xp_assert_equal(high, one)
  439. @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
  440. def test_length_two_constant_input(self, xp):
  441. # Zero variance input
  442. # See https://github.com/scipy/scipy/issues/3728
  443. # and https://github.com/scipy/scipy/issues/7730
  444. x = xp.asarray([0.667, 0.667])
  445. y = xp.asarray([0.123, 0.456])
  446. msg = "An input array is constant"
  447. with eager_warns(stats.ConstantInputWarning, match=msg, xp=xp):
  448. r, p = stats.pearsonr(x, y)
  449. xp_assert_close(r, xp.asarray(xp.nan))
  450. xp_assert_close(p, xp.asarray(xp.nan))
  451. # Expected values computed with R 3.6.2 cor.test, e.g.
  452. # options(digits=16)
  453. # x <- c(1, 2, 3, 4)
  454. # y <- c(0, 1, 0.5, 1)
  455. # cor.test(x, y, method = "pearson", alternative = "g")
  456. # correlation coefficient and p-value for alternative='two-sided'
  457. # calculated with mpmath agree to 16 digits.
  458. @skip_xp_backends(np_only=True)
  459. @pytest.mark.parametrize('alternative, pval, rlow, rhigh, sign',
  460. [('two-sided', 0.325800137536, -0.814938968841, 0.99230697523, 1),
  461. ('less', 0.8370999312316, -1, 0.985600937290653, 1),
  462. ('greater', 0.1629000687684, -0.6785654158217636, 1, 1),
  463. ('two-sided', 0.325800137536, -0.992306975236, 0.81493896884, -1),
  464. ('less', 0.1629000687684, -1.0, 0.6785654158217636, -1),
  465. ('greater', 0.8370999312316, -0.985600937290653, 1.0, -1)])
  466. def test_basic_example(self, alternative, pval, rlow, rhigh, sign, xp):
  467. x = [1, 2, 3, 4]
  468. y = np.array([0, 1, 0.5, 1]) * sign
  469. result = stats.pearsonr(x, y, alternative=alternative)
  470. assert_allclose(result.statistic, 0.6741998624632421*sign, rtol=1e-12)
  471. assert_allclose(result.pvalue, pval, rtol=1e-6)
  472. ci = result.confidence_interval()
  473. assert_allclose(ci, (rlow, rhigh), rtol=1e-6)
  474. def test_negative_correlation_pvalue_gh17795(self, xp):
  475. x = xp.arange(10.)
  476. y = -x
  477. test_greater = stats.pearsonr(x, y, alternative='greater')
  478. test_less = stats.pearsonr(x, y, alternative='less')
  479. xp_assert_close(test_greater.pvalue, xp.asarray(1.))
  480. xp_assert_close(test_less.pvalue, xp.asarray(0.), atol=1e-20)
  481. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  482. def test_length3_r_exactly_negative_one(self, xp):
  483. x = xp.asarray([1., 2., 3.])
  484. y = xp.asarray([5., -4., -13.])
  485. res = stats.pearsonr(x, y)
  486. # The expected r and p are exact.
  487. r, p = res
  488. one = xp.asarray(1.0)
  489. xp_assert_close(r, -one)
  490. xp_assert_close(p, 0*one, atol=1e-7)
  491. low, high = res.confidence_interval()
  492. xp_assert_equal(low, -one)
  493. xp_assert_equal(high, one)
  494. def test_input_validation(self):
  495. # Arraylike is np only
  496. x = [1, 2, 3]
  497. y = [4]
  498. message = '`x` and `y` must have the same length along `axis`.'
  499. with pytest.raises(ValueError, match=message):
  500. stats.pearsonr(x, y)
  501. x = [1, 2, 3]
  502. y = [4, 5]
  503. message = '`x` and `y` must be broadcastable.'
  504. with pytest.raises(ValueError, match=message):
  505. stats.pearsonr(x, y)
  506. x = [1]
  507. y = [2]
  508. message = '`x` and `y` must have length at least 2.'
  509. with pytest.raises(ValueError, match=message):
  510. stats.pearsonr(x, y)
  511. x = [-1j, -2j, -3.0j]
  512. y = [-1j, -2j, -3.0j]
  513. message = 'This function does not support complex data'
  514. with pytest.raises(ValueError, match=message):
  515. stats.pearsonr(x, y)
  516. message = "`method` must be an instance of..."
  517. with pytest.raises(ValueError, match=message):
  518. stats.pearsonr([1, 2], [3, 4], method="asymptotic")
  519. res = stats.pearsonr([1, 2], [3, 4])
  520. with pytest.raises(ValueError, match=message):
  521. res.confidence_interval(method="exact")
  522. @pytest.mark.fail_slow(10)
  523. @pytest.mark.xfail_on_32bit("Monte Carlo method needs > a few kB of memory")
  524. @pytest.mark.parametrize('alternative', ('less', 'greater', 'two-sided'))
  525. @pytest.mark.parametrize('method_name',
  526. ('permutation', 'monte_carlo', 'monte_carlo2'))
  527. def test_resampling_pvalue(self, method_name, alternative):
  528. rng = np.random.default_rng(24623935790378923)
  529. size = (2, 100) if method_name == 'permutation' else (2, 1000)
  530. x = rng.normal(size=size)
  531. y = rng.normal(size=size)
  532. methods = {'permutation': stats.PermutationMethod(rng=rng),
  533. 'monte_carlo': stats.MonteCarloMethod(rvs=(rng.normal,)*2),
  534. 'monte_carlo2': stats.MonteCarloMethod(rng=1294)}
  535. method = methods[method_name]
  536. res = stats.pearsonr(x, y, alternative=alternative, method=method, axis=-1)
  537. ref = stats.pearsonr(x, y, alternative=alternative, axis=-1)
  538. assert_allclose(res.statistic, ref.statistic, rtol=1e-15)
  539. assert_allclose(res.pvalue, ref.pvalue, rtol=1e-2, atol=1e-3)
  540. if method_name == 'monte_carlo2':
  541. method = stats.MonteCarloMethod(rng=1294)
  542. res2 = stats.pearsonr(x, y, alternative=alternative, method=method, axis=-1)
  543. assert_equal(res2.statistic, res.statistic)
  544. assert_equal(res2.pvalue, res.pvalue)
  545. @pytest.mark.slow
  546. @pytest.mark.parametrize('alternative', ('less', 'greater', 'two-sided'))
  547. def test_bootstrap_ci(self, alternative):
  548. rng = np.random.default_rng(2462935790378923)
  549. x = rng.normal(size=(2, 100))
  550. y = rng.normal(size=(2, 100))
  551. res = stats.pearsonr(x, y, alternative=alternative, axis=-1)
  552. # preserve use of old random_state during SPEC 7 transition
  553. rng = np.random.default_rng(724358723498249852)
  554. method = stats.BootstrapMethod(random_state=rng)
  555. res_ci = res.confidence_interval(method=method)
  556. ref_ci = res.confidence_interval()
  557. assert_allclose(res_ci, ref_ci, atol=1.5e-2)
  558. # `rng` is the new argument name`
  559. rng = np.random.default_rng(724358723498249852)
  560. method = stats.BootstrapMethod(rng=rng)
  561. res_ci2 = res.confidence_interval(method=method)
  562. assert_allclose(res_ci2, res_ci)
  563. @pytest.mark.parametrize('axis', [0, 1])
  564. def test_axis01(self, axis):
  565. rng = np.random.default_rng(38572345825)
  566. shape = (9, 10)
  567. x, y = rng.normal(size=(2,) + shape)
  568. res = stats.pearsonr(x, y, axis=axis)
  569. ci = res.confidence_interval()
  570. if axis == 0:
  571. x, y = x.T, y.T
  572. for i in range(x.shape[0]):
  573. res_i = stats.pearsonr(x[i], y[i])
  574. ci_i = res_i.confidence_interval()
  575. assert_allclose(res.statistic[i], res_i.statistic)
  576. assert_allclose(res.pvalue[i], res_i.pvalue)
  577. assert_allclose(ci.low[i], ci_i.low)
  578. assert_allclose(ci.high[i], ci_i.high)
  579. def test_axis_None(self):
  580. rng = np.random.default_rng(38572345825)
  581. shape = (9, 10)
  582. x, y = rng.normal(size=(2,) + shape)
  583. res = stats.pearsonr(x, y, axis=None)
  584. ci = res.confidence_interval()
  585. ref = stats.pearsonr(x.ravel(), y.ravel())
  586. ci_ref = ref.confidence_interval()
  587. assert_allclose(res.statistic, ref.statistic)
  588. assert_allclose(res.pvalue, ref.pvalue)
  589. assert_allclose(ci, ci_ref)
  590. def test_nd_input_validation(self, xp):
  591. x = y = xp.ones((2, 5))
  592. message = '`axis` must be an integer.'
  593. with pytest.raises(ValueError, match=message):
  594. stats.pearsonr(x, y, axis=1.5)
  595. message = '`x` and `y` must have the same length along `axis`'
  596. with pytest.raises(ValueError, match=message):
  597. stats.pearsonr(x, xp.ones((2, 1)), axis=1)
  598. message = '`x` and `y` must have length at least 2.'
  599. with pytest.raises(ValueError, match=message):
  600. stats.pearsonr(xp.ones((2, 1)), xp.ones((2, 1)), axis=1)
  601. message = '`x` and `y` must be broadcastable.'
  602. with pytest.raises(ValueError, match=message):
  603. stats.pearsonr(x, xp.ones((3, 5)), axis=1)
  604. message = '`method` must be `None` if arguments are not NumPy arrays.'
  605. if not is_numpy(xp):
  606. x = xp.arange(10)
  607. with pytest.raises(ValueError, match=message):
  608. stats.pearsonr(x, x, method=stats.PermutationMethod())
  609. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  610. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  611. def test_nd_special_cases(self, xp):
  612. rng = np.random.default_rng(34989235492245)
  613. x0, y0 = rng.random((4, 5)), rng.random((4, 5))
  614. x0[0, ...] = 1
  615. y0[1, ...] = 2
  616. x, y = xp.asarray(x0), xp.asarray(y0)
  617. message = 'An input array is constant'
  618. with eager_warns(stats.ConstantInputWarning, match=message, xp=xp):
  619. res = stats.pearsonr(x, y, axis=1)
  620. ci = res.confidence_interval()
  621. nans = xp.asarray([xp.nan, xp.nan], dtype=xp.float64)
  622. xp_assert_equal(res.statistic[0:2], nans)
  623. xp_assert_equal(res.pvalue[0:2], nans)
  624. xp_assert_equal(ci.low[0:2], nans)
  625. xp_assert_equal(ci.high[0:2], nans)
  626. assert xp.all(xp.isfinite(res.statistic[2:]))
  627. assert xp.all(xp.isfinite(res.pvalue[2:]))
  628. assert xp.all(xp.isfinite(ci.low[2:]))
  629. assert xp.all(xp.isfinite(ci.high[2:]))
  630. x0[0, 0], y0[1, 1] = 1 + 1e-15, 2 + 1e-15
  631. x, y = xp.asarray(x0), xp.asarray(y0)
  632. message = 'An input array is nearly constant'
  633. with eager_warns(stats.NearConstantInputWarning, match=message, xp=xp):
  634. stats.pearsonr(x, y, axis=1)
  635. # length 2 along axis
  636. x = xp.asarray([[1, 2], [1, 2], [2, 1], [2, 1.]])
  637. y = xp.asarray([[1, 2], [2, 1], [1, 2], [2, 1.]])
  638. ones = xp.ones(4)
  639. res = stats.pearsonr(x, y, axis=-1)
  640. ci = res.confidence_interval()
  641. xp_assert_close(res.statistic, xp.asarray([1, -1, -1, 1.]))
  642. xp_assert_close(res.pvalue, ones)
  643. xp_assert_close(ci.low, -ones)
  644. xp_assert_close(ci.high, ones)
  645. def test_different_dimensionality(self, xp):
  646. # For better or for worse, there is one difference between the broadcasting
  647. # behavior of most stats functions and NumPy gufuncs / NEP 5: gufuncs `axis`
  648. # refers to the core dimension *before* prepending `1`s to the array shapes
  649. # to match dimensionality; SciPy's prepends `1`s first. For instance, in
  650. # SciPy, `vecdot` would work just like `xp.sum(x * y, axis=axis)`, but this
  651. # is NOT true of NumPy. The discrepancy only arises when there are multiple
  652. # arguments with different dimensionality and positive indices are used,
  653. # which is probably why it hasn't been a problem. There are pros and cons of
  654. # each convention, and we might want to consider changing our behavior in
  655. # SciPy 2.0. For now, preserve consistency / backward compatibility.
  656. rng = np.random.default_rng(45834598265019344)
  657. x = rng.random((3, 10))
  658. y = rng.random(10)
  659. res = stats.pearsonr(x, y, axis=1)
  660. ref = stats.pearsonr(x, y, axis=-1)
  661. assert_equal(res.statistic, ref.statistic)
  662. @pytest.mark.parametrize('axis', [0, 1, None])
  663. @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
  664. def test_array_api(self, xp, axis, alternative):
  665. x, y = rng.normal(size=(2, 10, 11))
  666. res = stats.pearsonr(xp.asarray(x), xp.asarray(y),
  667. axis=axis, alternative=alternative)
  668. ref = stats.pearsonr(x, y, axis=axis, alternative=alternative)
  669. xp_assert_close(res.statistic, xp.asarray(ref.statistic))
  670. xp_assert_close(res.pvalue, xp.asarray(ref.pvalue))
  671. res_ci = res.confidence_interval()
  672. ref_ci = ref.confidence_interval()
  673. xp_assert_close(res_ci.low, xp.asarray(ref_ci.low))
  674. xp_assert_close(res_ci.high, xp.asarray(ref_ci.high))
  675. class TestFisherExact:
  676. """Some tests to show that fisher_exact() works correctly.
  677. Note that in SciPy 0.9.0 this was not working well for large numbers due to
  678. inaccuracy of the hypergeom distribution (see #1218). Fixed now.
  679. Also note that R and SciPy have different argument formats for their
  680. hypergeometric distribution functions.
  681. R:
  682. > phyper(18999, 99000, 110000, 39000, lower.tail = FALSE)
  683. [1] 1.701815e-09
  684. """
  685. def test_basic(self):
  686. fisher_exact = stats.fisher_exact
  687. res = fisher_exact([[14500, 20000], [30000, 40000]])[1]
  688. assert_approx_equal(res, 0.01106, significant=4)
  689. res = fisher_exact([[100, 2], [1000, 5]])[1]
  690. assert_approx_equal(res, 0.1301, significant=4)
  691. res = fisher_exact([[2, 7], [8, 2]])[1]
  692. assert_approx_equal(res, 0.0230141, significant=6)
  693. res = fisher_exact([[5, 1], [10, 10]])[1]
  694. assert_approx_equal(res, 0.1973244, significant=6)
  695. res = fisher_exact([[5, 15], [20, 20]])[1]
  696. assert_approx_equal(res, 0.0958044, significant=6)
  697. res = fisher_exact([[5, 16], [20, 25]])[1]
  698. assert_approx_equal(res, 0.1725862, significant=6)
  699. res = fisher_exact([[10, 5], [10, 1]])[1]
  700. assert_approx_equal(res, 0.1973244, significant=6)
  701. res = fisher_exact([[5, 0], [1, 4]])[1]
  702. assert_approx_equal(res, 0.04761904, significant=6)
  703. res = fisher_exact([[0, 1], [3, 2]])[1]
  704. assert_approx_equal(res, 1.0)
  705. res = fisher_exact([[0, 2], [6, 4]])[1]
  706. assert_approx_equal(res, 0.4545454545)
  707. res = fisher_exact([[2, 7], [8, 2]])
  708. assert_approx_equal(res[1], 0.0230141, significant=6)
  709. assert_approx_equal(res[0], 4.0 / 56)
  710. def test_precise(self):
  711. # results from R
  712. #
  713. # R defines oddsratio differently (see Notes section of fisher_exact
  714. # docstring), so those will not match. We leave them in anyway, in
  715. # case they will be useful later on. We test only the p-value.
  716. tablist = [
  717. ([[100, 2], [1000, 5]], (2.505583993422285e-001, 1.300759363430016e-001)),
  718. ([[2, 7], [8, 2]], (8.586235135736206e-002, 2.301413756522114e-002)),
  719. ([[5, 1], [10, 10]], (4.725646047336584e+000, 1.973244147157190e-001)),
  720. ([[5, 15], [20, 20]], (3.394396617440852e-001, 9.580440012477637e-002)),
  721. ([[5, 16], [20, 25]], (3.960558326183334e-001, 1.725864953812994e-001)),
  722. ([[10, 5], [10, 1]], (2.116112781158483e-001, 1.973244147157190e-001)),
  723. ([[10, 5], [10, 0]], (0.000000000000000e+000, 6.126482213438734e-002)),
  724. ([[5, 0], [1, 4]], (np.inf, 4.761904761904762e-002)),
  725. ([[0, 5], [1, 4]], (0.000000000000000e+000, 1.000000000000000e+000)),
  726. ([[5, 1], [0, 4]], (np.inf, 4.761904761904758e-002)),
  727. ([[0, 1], [3, 2]], (0.000000000000000e+000, 1.000000000000000e+000))
  728. ]
  729. for table, res_r in tablist:
  730. res = stats.fisher_exact(np.asarray(table))
  731. np.testing.assert_almost_equal(res[1], res_r[1], decimal=11,
  732. verbose=True)
  733. def test_gh4130(self):
  734. # Previously, a fudge factor used to distinguish between theoretically
  735. # and numerically different probability masses was 1e-4; it has been
  736. # tightened to fix gh4130. Accuracy checked against R fisher.test.
  737. # options(digits=16)
  738. # table <- matrix(c(6, 108, 37, 200), nrow = 2)
  739. # fisher.test(table, alternative = "t")
  740. x = [[6, 37], [108, 200]]
  741. res = stats.fisher_exact(x)
  742. assert_allclose(res[1], 0.005092697748126)
  743. # case from https://github.com/brentp/fishers_exact_test/issues/27
  744. # That package has an (absolute?) fudge factor of 1e-6; too big
  745. x = [[22, 0], [0, 102]]
  746. res = stats.fisher_exact(x)
  747. assert_allclose(res[1], 7.175066786244549e-25)
  748. # case from https://github.com/brentp/fishers_exact_test/issues/1
  749. x = [[94, 48], [3577, 16988]]
  750. res = stats.fisher_exact(x)
  751. assert_allclose(res[1], 2.069356340993818e-37)
  752. def test_gh9231(self):
  753. # Previously, fisher_exact was extremely slow for this table
  754. # As reported in gh-9231, the p-value should be very nearly zero
  755. x = [[5829225, 5692693], [5760959, 5760959]]
  756. res = stats.fisher_exact(x)
  757. assert_allclose(res[1], 0, atol=1e-170)
  758. @pytest.mark.slow
  759. def test_large_numbers(self):
  760. # Test with some large numbers. Regression test for #1401
  761. pvals = [5.56e-11, 2.666e-11, 1.363e-11] # from R
  762. for pval, num in zip(pvals, [75, 76, 77]):
  763. res = stats.fisher_exact([[17704, 496], [1065, num]])[1]
  764. assert_approx_equal(res, pval, significant=4)
  765. res = stats.fisher_exact([[18000, 80000], [20000, 90000]])[1]
  766. assert_approx_equal(res, 0.2751, significant=4)
  767. def test_raises(self):
  768. # test we raise an error for wrong number of dimensions.
  769. message = "The input `table` must have two dimensions."
  770. with pytest.raises(ValueError, match=message):
  771. stats.fisher_exact(np.arange(6))
  772. def test_row_or_col_zero(self):
  773. tables = ([[0, 0], [5, 10]],
  774. [[5, 10], [0, 0]],
  775. [[0, 5], [0, 10]],
  776. [[5, 0], [10, 0]])
  777. for table in tables:
  778. oddsratio, pval = stats.fisher_exact(table)
  779. assert_equal(pval, 1.0)
  780. assert_equal(oddsratio, np.nan)
  781. def test_less_greater(self):
  782. tables = (
  783. # Some tables to compare with R:
  784. [[2, 7], [8, 2]],
  785. [[200, 7], [8, 300]],
  786. [[28, 21], [6, 1957]],
  787. [[190, 800], [200, 900]],
  788. # Some tables with simple exact values
  789. # (includes regression test for ticket #1568):
  790. [[0, 2], [3, 0]],
  791. [[1, 1], [2, 1]],
  792. [[2, 0], [1, 2]],
  793. [[0, 1], [2, 3]],
  794. [[1, 0], [1, 4]],
  795. )
  796. pvals = (
  797. # from R:
  798. [0.018521725952066501, 0.9990149169715733],
  799. [1.0, 2.0056578803889148e-122],
  800. [1.0, 5.7284374608319831e-44],
  801. [0.7416227, 0.2959826],
  802. # Exact:
  803. [0.1, 1.0],
  804. [0.7, 0.9],
  805. [1.0, 0.3],
  806. [2./3, 1.0],
  807. [1.0, 1./3],
  808. )
  809. for table, pval in zip(tables, pvals):
  810. res = []
  811. res.append(stats.fisher_exact(table, alternative="less")[1])
  812. res.append(stats.fisher_exact(table, alternative="greater")[1])
  813. assert_allclose(res, pval, atol=0, rtol=1e-7)
  814. def test_gh3014(self):
  815. # check if issue #3014 has been fixed.
  816. # before, this would have risen a ValueError
  817. odds, pvalue = stats.fisher_exact([[1, 2], [9, 84419233]])
  818. @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater'])
  819. def test_result(self, alternative):
  820. table = np.array([[14500, 20000], [30000, 40000]])
  821. res = stats.fisher_exact(table, alternative=alternative)
  822. assert_equal((res.statistic, res.pvalue), res)
  823. def test_input_validation_edge_cases_rxc(self):
  824. rng = np.random.default_rng(2345783457834572345)
  825. table = np.asarray([[2, 7], [8, 2]])
  826. message = r"`alternative` must be the default \(None\) unless..."
  827. with pytest.raises(ValueError, match=message):
  828. method = stats.PermutationMethod(rng=rng)
  829. stats.fisher_exact(table, method=method, alternative='less')
  830. message = "...not recognized; if provided, `method` must be an..."
  831. with pytest.raises(ValueError, match=message):
  832. method = stats.BootstrapMethod(rng=rng)
  833. stats.fisher_exact(table, method=method)
  834. message = "If the `method` argument of `fisher_exact` is an..."
  835. with pytest.raises(ValueError, match=message):
  836. method = stats.MonteCarloMethod(rvs=stats.norm.rvs)
  837. stats.fisher_exact(table, method=method)
  838. message = "`table` must have at least one row and one column."
  839. with pytest.raises(ValueError, match=message):
  840. stats.fisher_exact(np.zeros((0, 1)))
  841. # Specical case: when there is only one table with given marginals, the
  842. # PMF of that case is 1.0, so the p-value is 1.0
  843. np.testing.assert_equal(stats.fisher_exact([[1, 2, 3]]), (1, 1))
  844. np.testing.assert_equal(stats.fisher_exact([[1], [2], [3]]), (1, 1))
  845. np.testing.assert_equal(stats.fisher_exact(np.zeros((2, 3))), (1, 1))
  846. @pytest.mark.fail_slow(10)
  847. @pytest.mark.slow()
  848. def test_resampling_2x2(self):
  849. rng = np.random.default_rng(2345783457834572345)
  850. table = np.asarray([[2, 7], [8, 2]])
  851. ref = stats.fisher_exact(table)
  852. ref_pvalue = ref.pvalue
  853. ref_stat = stats.random_table(table.sum(axis=1), table.sum(axis=0)).pmf(table)
  854. method = stats.MonteCarloMethod(rng=rng)
  855. res = stats.fisher_exact(table, method=method)
  856. assert_allclose(res.pvalue, ref_pvalue, atol=0.0025)
  857. assert_equal(res.statistic, ref_stat)
  858. method = stats.PermutationMethod(rng=rng)
  859. res = stats.fisher_exact(table, method=method)
  860. assert_allclose(res.pvalue, ref.pvalue, atol=0.0025)
  861. assert_equal(res.statistic, ref_stat)
  862. @pytest.mark.fail_slow(10)
  863. @pytest.mark.slow()
  864. def test_resampling_rxc(self):
  865. # Compare against R fisher.exact
  866. # options(digits=16)
  867. # MP6 < - rbind(
  868. # c(1, 2, 2, 1, 1, 0, 1),
  869. # c(2, 0, 0, 2, 3, 0, 0),
  870. # c(0, 1, 1, 1, 2, 7, 3),
  871. # c(1, 1, 2, 0, 0, 0, 1),
  872. # c(0, 1, 1, 1, 1, 0, 0))
  873. # fisher.test(MP6)
  874. table = [[1, 2, 2, 1, 1, 0, 1],
  875. [2, 0, 0, 2, 3, 0, 0],
  876. [0, 1, 1, 1, 2, 7, 3],
  877. [1, 1, 2, 0, 0, 0, 1],
  878. [0, 1, 1, 1, 1, 0, 0]]
  879. table = np.asarray(table)
  880. ref_pvalue = 0.03928964365533
  881. rng = np.random.default_rng(3928964365533)
  882. method = stats.PermutationMethod(rng=rng)
  883. res = stats.fisher_exact(table, method=method)
  884. assert_allclose(res.pvalue, ref_pvalue, atol=5e-4)
  885. method = stats.MonteCarloMethod(rng=rng, n_resamples=99999)
  886. res = stats.fisher_exact(table, method=method)
  887. assert_allclose(res.pvalue, ref_pvalue, atol=5e-4)
  888. @pytest.mark.xslow()
  889. def test_resampling_exact_2x2(self):
  890. # Test that exact permutation p-value matches result of `fisher_exact`
  891. rng = np.random.default_rng(2345783457834572345)
  892. method = stats.PermutationMethod(rng=rng)
  893. for a in range(1, 3):
  894. for b in range(1, 3):
  895. for c in range(1, 3):
  896. for d in range(1, 4):
  897. table = np.asarray([[a, b], [c, d]])
  898. ref = stats.fisher_exact(table)
  899. res = stats.fisher_exact(table, method=method)
  900. assert_allclose(res.pvalue, ref.pvalue, atol=1e-14)
  901. class TestCorrSpearmanr:
  902. """ W.II.D. Compute a correlation matrix on all the variables.
  903. All the correlations, except for ZERO and MISS, should be exactly 1.
  904. ZERO and MISS should have undefined or missing correlations with the
  905. other variables. The same should go for SPEARMAN correlations, if
  906. your program has them.
  907. """
  908. def setup_method(self):
  909. self.rng = np.random.default_rng(228584263)
  910. def test_scalar(self):
  911. y = stats.spearmanr(4., 2.)
  912. assert_(np.isnan(y).all())
  913. def test_uneven_lengths(self):
  914. assert_raises(ValueError, stats.spearmanr, [1, 2, 1], [8, 9])
  915. assert_raises(ValueError, stats.spearmanr, [1, 2, 1], 8)
  916. def test_uneven_2d_shapes(self):
  917. # Different number of columns should work - those just get concatenated.
  918. x = self.rng.standard_normal((4, 3))
  919. y = self.rng.standard_normal((4, 2))
  920. assert stats.spearmanr(x, y).statistic.shape == (5, 5)
  921. assert stats.spearmanr(x.T, y.T, axis=1).pvalue.shape == (5, 5)
  922. assert_raises(ValueError, stats.spearmanr, x, y, axis=1)
  923. assert_raises(ValueError, stats.spearmanr, x.T, y.T)
  924. def test_ndim_too_high(self):
  925. x = self.rng.standard_normal((4, 3, 2))
  926. assert_raises(ValueError, stats.spearmanr, x)
  927. assert_raises(ValueError, stats.spearmanr, x, x)
  928. assert_raises(ValueError, stats.spearmanr, x, None, None)
  929. # But should work with axis=None (raveling axes) for two input arrays
  930. assert_allclose(stats.spearmanr(x, x, axis=None),
  931. stats.spearmanr(x.flatten(), x.flatten(), axis=0))
  932. def test_nan_policy(self):
  933. x = np.arange(10.)
  934. x[9] = np.nan
  935. assert_array_equal(stats.spearmanr(x, x), (np.nan, np.nan))
  936. assert_array_equal(stats.spearmanr(x, x, nan_policy='omit'),
  937. (1.0, 0.0))
  938. assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='raise')
  939. assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='foobar')
  940. def test_nan_policy_bug_12458(self):
  941. rng = np.random.default_rng(8119864466)
  942. x = rng.random((5, 10))
  943. k = 6
  944. x[:, k] = np.nan
  945. y = np.delete(x, k, axis=1)
  946. corx, px = stats.spearmanr(x, nan_policy='omit')
  947. cory, py = stats.spearmanr(y)
  948. corx = np.delete(np.delete(corx, k, axis=1), k, axis=0)
  949. px = np.delete(np.delete(px, k, axis=1), k, axis=0)
  950. assert_allclose(corx, cory, atol=1e-14)
  951. assert_allclose(px, py, atol=1e-14)
  952. def test_nan_policy_bug_12411(self):
  953. m = 5
  954. n = 10
  955. x = self.rng.standard_normal((m, n))
  956. x[1, 0] = np.nan
  957. x[3, -1] = np.nan
  958. corr, pvalue = stats.spearmanr(x, axis=1, nan_policy="propagate")
  959. res = [[stats.spearmanr(x[i, :], x[j, :]).statistic for i in range(m)]
  960. for j in range(m)]
  961. assert_allclose(corr, res)
  962. def test_sXX(self):
  963. y = stats.spearmanr(X,X)
  964. r = y[0]
  965. assert_approx_equal(r,1.0)
  966. def test_sXBIG(self):
  967. y = stats.spearmanr(X,BIG)
  968. r = y[0]
  969. assert_approx_equal(r,1.0)
  970. def test_sXLITTLE(self):
  971. y = stats.spearmanr(X,LITTLE)
  972. r = y[0]
  973. assert_approx_equal(r,1.0)
  974. def test_sXHUGE(self):
  975. y = stats.spearmanr(X,HUGE)
  976. r = y[0]
  977. assert_approx_equal(r,1.0)
  978. def test_sXTINY(self):
  979. y = stats.spearmanr(X,TINY)
  980. r = y[0]
  981. assert_approx_equal(r,1.0)
  982. def test_sXROUND(self):
  983. y = stats.spearmanr(X,ROUND)
  984. r = y[0]
  985. assert_approx_equal(r,1.0)
  986. def test_sBIGBIG(self):
  987. y = stats.spearmanr(BIG,BIG)
  988. r = y[0]
  989. assert_approx_equal(r,1.0)
  990. def test_sBIGLITTLE(self):
  991. y = stats.spearmanr(BIG,LITTLE)
  992. r = y[0]
  993. assert_approx_equal(r,1.0)
  994. def test_sBIGHUGE(self):
  995. y = stats.spearmanr(BIG,HUGE)
  996. r = y[0]
  997. assert_approx_equal(r,1.0)
  998. def test_sBIGTINY(self):
  999. y = stats.spearmanr(BIG,TINY)
  1000. r = y[0]
  1001. assert_approx_equal(r,1.0)
  1002. def test_sBIGROUND(self):
  1003. y = stats.spearmanr(BIG,ROUND)
  1004. r = y[0]
  1005. assert_approx_equal(r,1.0)
  1006. def test_sLITTLELITTLE(self):
  1007. y = stats.spearmanr(LITTLE,LITTLE)
  1008. r = y[0]
  1009. assert_approx_equal(r,1.0)
  1010. def test_sLITTLEHUGE(self):
  1011. y = stats.spearmanr(LITTLE,HUGE)
  1012. r = y[0]
  1013. assert_approx_equal(r,1.0)
  1014. def test_sLITTLETINY(self):
  1015. y = stats.spearmanr(LITTLE,TINY)
  1016. r = y[0]
  1017. assert_approx_equal(r,1.0)
  1018. def test_sLITTLEROUND(self):
  1019. y = stats.spearmanr(LITTLE,ROUND)
  1020. r = y[0]
  1021. assert_approx_equal(r,1.0)
  1022. def test_sHUGEHUGE(self):
  1023. y = stats.spearmanr(HUGE,HUGE)
  1024. r = y[0]
  1025. assert_approx_equal(r,1.0)
  1026. def test_sHUGETINY(self):
  1027. y = stats.spearmanr(HUGE,TINY)
  1028. r = y[0]
  1029. assert_approx_equal(r,1.0)
  1030. def test_sHUGEROUND(self):
  1031. y = stats.spearmanr(HUGE,ROUND)
  1032. r = y[0]
  1033. assert_approx_equal(r,1.0)
  1034. def test_sTINYTINY(self):
  1035. y = stats.spearmanr(TINY,TINY)
  1036. r = y[0]
  1037. assert_approx_equal(r,1.0)
  1038. def test_sTINYROUND(self):
  1039. y = stats.spearmanr(TINY,ROUND)
  1040. r = y[0]
  1041. assert_approx_equal(r,1.0)
  1042. def test_sROUNDROUND(self):
  1043. y = stats.spearmanr(ROUND,ROUND)
  1044. r = y[0]
  1045. assert_approx_equal(r,1.0)
  1046. def test_spearmanr_result_attributes(self):
  1047. res = stats.spearmanr(X, X)
  1048. attributes = ('correlation', 'pvalue')
  1049. check_named_results(res, attributes)
  1050. assert_equal(res.correlation, res.statistic)
  1051. def test_1d_vs_2d(self):
  1052. x1 = [1, 2, 3, 4, 5, 6]
  1053. x2 = [1, 2, 3, 4, 6, 5]
  1054. res1 = stats.spearmanr(x1, x2)
  1055. res2 = stats.spearmanr(np.asarray([x1, x2]).T)
  1056. assert_allclose(res1, res2)
  1057. def test_1d_vs_2d_nans(self):
  1058. # Now the same with NaNs present. Regression test for gh-9103.
  1059. for nan_policy in ['propagate', 'omit']:
  1060. x1 = [1, np.nan, 3, 4, 5, 6]
  1061. x2 = [1, 2, 3, 4, 6, np.nan]
  1062. res1 = stats.spearmanr(x1, x2, nan_policy=nan_policy)
  1063. res2 = stats.spearmanr(np.asarray([x1, x2]).T, nan_policy=nan_policy)
  1064. assert_allclose(res1, res2)
  1065. def test_3cols(self):
  1066. x1 = np.arange(6)
  1067. x2 = -x1
  1068. x3 = np.array([0, 1, 2, 3, 5, 4])
  1069. x = np.asarray([x1, x2, x3]).T
  1070. actual = stats.spearmanr(x)
  1071. expected_corr = np.array([[1, -1, 0.94285714],
  1072. [-1, 1, -0.94285714],
  1073. [0.94285714, -0.94285714, 1]])
  1074. expected_pvalue = np.zeros((3, 3), dtype=float)
  1075. expected_pvalue[2, 0:2] = 0.00480466472
  1076. expected_pvalue[0:2, 2] = 0.00480466472
  1077. assert_allclose(actual.statistic, expected_corr)
  1078. assert_allclose(actual.pvalue, expected_pvalue)
  1079. def test_gh_9103(self):
  1080. # Regression test for gh-9103.
  1081. x = np.array([[np.nan, 3.0, 4.0, 5.0, 5.1, 6.0, 9.2],
  1082. [5.0, np.nan, 4.1, 4.8, 4.9, 5.0, 4.1],
  1083. [0.5, 4.0, 7.1, 3.8, 8.0, 5.1, 7.6]]).T
  1084. corr = np.array([[np.nan, np.nan, np.nan],
  1085. [np.nan, np.nan, np.nan],
  1086. [np.nan, np.nan, 1.]])
  1087. assert_allclose(stats.spearmanr(x, nan_policy='propagate').statistic,
  1088. corr)
  1089. res = stats.spearmanr(x, nan_policy='omit').statistic
  1090. assert_allclose((res[0][1], res[0][2], res[1][2]),
  1091. (0.2051957, 0.4857143, -0.4707919), rtol=1e-6)
  1092. def test_gh_8111(self):
  1093. # Regression test for gh-8111 (different result for float/int/bool).
  1094. n = 100
  1095. rng = np.random.RandomState(234568)
  1096. x = rng.rand(n)
  1097. m = rng.rand(n) > 0.7
  1098. # bool against float, no nans
  1099. a = (x > .5)
  1100. b = np.array(x)
  1101. res1 = stats.spearmanr(a, b, nan_policy='omit').statistic
  1102. # bool against float with NaNs
  1103. b[m] = np.nan
  1104. res2 = stats.spearmanr(a, b, nan_policy='omit').statistic
  1105. # int against float with NaNs
  1106. a = a.astype(np.int32)
  1107. res3 = stats.spearmanr(a, b, nan_policy='omit').statistic
  1108. expected = [0.865895477, 0.866100381, 0.866100381]
  1109. assert_allclose([res1, res2, res3], expected)
  1110. class TestCorrSpearmanr2:
  1111. """Some further tests of the spearmanr function."""
  1112. def test_spearmanr_vs_r(self):
  1113. # Cross-check with R:
  1114. # cor.test(c(1,2,3,4,5),c(5,6,7,8,7),method="spearmanr")
  1115. x1 = [1, 2, 3, 4, 5]
  1116. x2 = [5, 6, 7, 8, 7]
  1117. expected = (0.82078268166812329, 0.088587005313543798)
  1118. res = stats.spearmanr(x1, x2)
  1119. assert_approx_equal(res[0], expected[0])
  1120. assert_approx_equal(res[1], expected[1])
  1121. def test_empty_arrays(self):
  1122. assert_equal(stats.spearmanr([], []), (np.nan, np.nan))
  1123. def test_normal_draws(self):
  1124. rng = np.random.RandomState(7546)
  1125. x = np.array([rng.normal(loc=1, scale=1, size=500),
  1126. rng.normal(loc=1, scale=1, size=500)])
  1127. corr = [[1.0, 0.3],
  1128. [0.3, 1.0]]
  1129. x = np.dot(np.linalg.cholesky(corr), x)
  1130. expected = (0.28659685838743354, 6.579862219051161e-11)
  1131. res = stats.spearmanr(x[0], x[1])
  1132. assert_approx_equal(res[0], expected[0])
  1133. assert_approx_equal(res[1], expected[1])
  1134. def test_corr_1(self):
  1135. assert_approx_equal(stats.spearmanr([1, 1, 2], [1, 1, 2])[0], 1.0)
  1136. def test_nan_policies(self):
  1137. x = np.arange(10.)
  1138. x[9] = np.nan
  1139. assert_array_equal(stats.spearmanr(x, x), (np.nan, np.nan))
  1140. assert_allclose(stats.spearmanr(x, x, nan_policy='omit'),
  1141. (1.0, 0))
  1142. assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='raise')
  1143. assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='foobar')
  1144. def test_unequal_lengths(self):
  1145. x = np.arange(10.)
  1146. y = np.arange(20.)
  1147. assert_raises(ValueError, stats.spearmanr, x, y)
  1148. def test_omit_paired_value(self):
  1149. x1 = [1, 2, 3, 4]
  1150. x2 = [8, 7, 6, np.nan]
  1151. res1 = stats.spearmanr(x1, x2, nan_policy='omit')
  1152. res2 = stats.spearmanr(x1[:3], x2[:3], nan_policy='omit')
  1153. assert_equal(res1, res2)
  1154. def test_gh_issue_6061_windows_overflow(self):
  1155. x = list(range(2000))
  1156. y = list(range(2000))
  1157. y[0], y[9] = y[9], y[0]
  1158. y[10], y[434] = y[434], y[10]
  1159. y[435], y[1509] = y[1509], y[435]
  1160. # rho = 1 - 6 * (2 * (9^2 + 424^2 + 1074^2))/(2000 * (2000^2 - 1))
  1161. # = 1 - (1 / 500)
  1162. # = 0.998
  1163. x.append(np.nan)
  1164. y.append(3.0)
  1165. assert_almost_equal(stats.spearmanr(x, y, nan_policy='omit')[0], 0.998)
  1166. def test_tie0(self):
  1167. # with only ties in one or both inputs
  1168. warn_msg = "An input array is constant"
  1169. with pytest.warns(stats.ConstantInputWarning, match=warn_msg):
  1170. r, p = stats.spearmanr([2, 2, 2], [2, 2, 2])
  1171. assert_equal(r, np.nan)
  1172. assert_equal(p, np.nan)
  1173. r, p = stats.spearmanr([2, 0, 2], [2, 2, 2])
  1174. assert_equal(r, np.nan)
  1175. assert_equal(p, np.nan)
  1176. r, p = stats.spearmanr([2, 2, 2], [2, 0, 2])
  1177. assert_equal(r, np.nan)
  1178. assert_equal(p, np.nan)
  1179. def test_tie1(self):
  1180. # Data
  1181. x = [1.0, 2.0, 3.0, 4.0]
  1182. y = [1.0, 2.0, 2.0, 3.0]
  1183. # Ranks of the data, with tie-handling.
  1184. xr = [1.0, 2.0, 3.0, 4.0]
  1185. yr = [1.0, 2.5, 2.5, 4.0]
  1186. # Result of spearmanr should be the same as applying
  1187. # pearsonr to the ranks.
  1188. sr = stats.spearmanr(x, y)
  1189. pr = stats.pearsonr(xr, yr)
  1190. assert_almost_equal(sr, pr)
  1191. def test_tie2(self):
  1192. # Test tie-handling if inputs contain nan's
  1193. # Data without nan's
  1194. x1 = [1, 2, 2.5, 2]
  1195. y1 = [1, 3, 2.5, 4]
  1196. # Same data with nan's
  1197. x2 = [1, 2, 2.5, 2, np.nan]
  1198. y2 = [1, 3, 2.5, 4, np.nan]
  1199. # Results for two data sets should be the same if nan's are ignored
  1200. sr1 = stats.spearmanr(x1, y1)
  1201. sr2 = stats.spearmanr(x2, y2, nan_policy='omit')
  1202. assert_almost_equal(sr1, sr2)
  1203. def test_ties_axis_1(self):
  1204. z1 = np.array([[1, 1, 1, 1], [1, 2, 3, 4]])
  1205. z2 = np.array([[1, 2, 3, 4], [1, 1, 1, 1]])
  1206. z3 = np.array([[1, 1, 1, 1], [1, 1, 1, 1]])
  1207. warn_msg = "An input array is constant"
  1208. with pytest.warns(stats.ConstantInputWarning, match=warn_msg):
  1209. r, p = stats.spearmanr(z1, axis=1)
  1210. assert_equal(r, np.nan)
  1211. assert_equal(p, np.nan)
  1212. r, p = stats.spearmanr(z2, axis=1)
  1213. assert_equal(r, np.nan)
  1214. assert_equal(p, np.nan)
  1215. r, p = stats.spearmanr(z3, axis=1)
  1216. assert_equal(r, np.nan)
  1217. assert_equal(p, np.nan)
  1218. def test_gh_11111(self):
  1219. x = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
  1220. y = np.array([0, 0.009783728115345005, 0, 0, 0.0019759230121848587,
  1221. 0.0007535430349118562, 0.0002661781514710257, 0, 0,
  1222. 0.0007835762419683435])
  1223. warn_msg = "An input array is constant"
  1224. with pytest.warns(stats.ConstantInputWarning, match=warn_msg):
  1225. r, p = stats.spearmanr(x, y)
  1226. assert_equal(r, np.nan)
  1227. assert_equal(p, np.nan)
  1228. def test_index_error(self):
  1229. x = np.array([1.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
  1230. y = np.array([0, 0.009783728115345005, 0, 0, 0.0019759230121848587,
  1231. 0.0007535430349118562, 0.0002661781514710257, 0, 0,
  1232. 0.0007835762419683435])
  1233. assert_raises(ValueError, stats.spearmanr, x, y, axis=2)
  1234. def test_alternative(self):
  1235. # Test alternative parameter
  1236. # Simple test - Based on the above ``test_spearmanr_vs_r``
  1237. x1 = [1, 2, 3, 4, 5]
  1238. x2 = [5, 6, 7, 8, 7]
  1239. # strong positive correlation
  1240. expected = (0.82078268166812329, 0.088587005313543798)
  1241. # correlation > 0 -> large "less" p-value
  1242. res = stats.spearmanr(x1, x2, alternative="less")
  1243. assert_approx_equal(res[0], expected[0])
  1244. assert_approx_equal(res[1], 1 - (expected[1] / 2))
  1245. # correlation > 0 -> small "less" p-value
  1246. res = stats.spearmanr(x1, x2, alternative="greater")
  1247. assert_approx_equal(res[0], expected[0])
  1248. assert_approx_equal(res[1], expected[1] / 2)
  1249. with pytest.raises(ValueError, match="`alternative` must be 'less'..."):
  1250. stats.spearmanr(x1, x2, alternative="ekki-ekki")
  1251. @pytest.mark.parametrize("alternative", ('two-sided', 'less', 'greater'))
  1252. def test_alternative_nan_policy(self, alternative):
  1253. # Test nan policies
  1254. x1 = [1, 2, 3, 4, 5]
  1255. x2 = [5, 6, 7, 8, 7]
  1256. x1nan = x1 + [np.nan]
  1257. x2nan = x2 + [np.nan]
  1258. # test nan_policy="propagate"
  1259. assert_array_equal(stats.spearmanr(x1nan, x2nan), (np.nan, np.nan))
  1260. # test nan_policy="omit"
  1261. res_actual = stats.spearmanr(x1nan, x2nan, nan_policy='omit',
  1262. alternative=alternative)
  1263. res_expected = stats.spearmanr(x1, x2, alternative=alternative)
  1264. assert_allclose(res_actual, res_expected)
  1265. # test nan_policy="raise"
  1266. message = 'The input contains nan values'
  1267. with pytest.raises(ValueError, match=message):
  1268. stats.spearmanr(x1nan, x2nan, nan_policy='raise',
  1269. alternative=alternative)
  1270. # test invalid nan_policy
  1271. message = "nan_policy must be one of..."
  1272. with pytest.raises(ValueError, match=message):
  1273. stats.spearmanr(x1nan, x2nan, nan_policy='ekki-ekki',
  1274. alternative=alternative)
  1275. # W.II.E. Tabulate X against X, using BIG as a case weight. The values
  1276. # should appear on the diagonal and the total should be 899999955.
  1277. # If the table cannot hold these values, forget about working with
  1278. # census data. You can also tabulate HUGE against TINY. There is no
  1279. # reason a tabulation program should not be able to distinguish
  1280. # different values regardless of their magnitude.
  1281. # I need to figure out how to do this one.
  1282. @pytest.mark.thread_unsafe(reason="fails in parallel")
  1283. def test_kendalltau():
  1284. # For the cases without ties, both variants should give the same
  1285. # result.
  1286. variants = ('b', 'c')
  1287. # case without ties, con-dis equal zero
  1288. x = [5, 2, 1, 3, 6, 4, 7, 8]
  1289. y = [5, 2, 6, 3, 1, 8, 7, 4]
  1290. # Cross-check with exact result from R:
  1291. # cor.test(x,y,method="kendall",exact=1)
  1292. expected = (0.0, 1.0)
  1293. for taux in variants:
  1294. res = stats.kendalltau(x, y, variant=taux)
  1295. assert_approx_equal(res[0], expected[0])
  1296. assert_approx_equal(res[1], expected[1])
  1297. # case without ties, con-dis equal zero
  1298. x = [0, 5, 2, 1, 3, 6, 4, 7, 8]
  1299. y = [5, 2, 0, 6, 3, 1, 8, 7, 4]
  1300. # Cross-check with exact result from R:
  1301. # cor.test(x,y,method="kendall",exact=1)
  1302. expected = (0.0, 1.0)
  1303. for taux in variants:
  1304. res = stats.kendalltau(x, y, variant=taux)
  1305. assert_approx_equal(res[0], expected[0])
  1306. assert_approx_equal(res[1], expected[1])
  1307. # case without ties, con-dis close to zero
  1308. x = [5, 2, 1, 3, 6, 4, 7]
  1309. y = [5, 2, 6, 3, 1, 7, 4]
  1310. # Cross-check with exact result from R:
  1311. # cor.test(x,y,method="kendall",exact=1)
  1312. expected = (-0.14285714286, 0.77261904762)
  1313. for taux in variants:
  1314. res = stats.kendalltau(x, y, variant=taux)
  1315. assert_approx_equal(res[0], expected[0])
  1316. assert_approx_equal(res[1], expected[1])
  1317. # case without ties, con-dis close to zero
  1318. x = [2, 1, 3, 6, 4, 7, 8]
  1319. y = [2, 6, 3, 1, 8, 7, 4]
  1320. # Cross-check with exact result from R:
  1321. # cor.test(x,y,method="kendall",exact=1)
  1322. expected = (0.047619047619, 1.0)
  1323. for taux in variants:
  1324. res = stats.kendalltau(x, y, variant=taux)
  1325. assert_approx_equal(res[0], expected[0])
  1326. assert_approx_equal(res[1], expected[1])
  1327. # simple case without ties
  1328. x = np.arange(10)
  1329. y = np.arange(10)
  1330. # Cross-check with exact result from R:
  1331. # cor.test(x,y,method="kendall",exact=1)
  1332. expected = (1.0, 5.511463844797e-07)
  1333. for taux in variants:
  1334. res = stats.kendalltau(x, y, variant=taux)
  1335. assert_approx_equal(res[0], expected[0])
  1336. assert_approx_equal(res[1], expected[1])
  1337. # swap a couple of values
  1338. b = y[1]
  1339. y[1] = y[2]
  1340. y[2] = b
  1341. # Cross-check with exact result from R:
  1342. # cor.test(x,y,method="kendall",exact=1)
  1343. expected = (0.9555555555555556, 5.511463844797e-06)
  1344. for taux in variants:
  1345. res = stats.kendalltau(x, y, variant=taux)
  1346. assert_approx_equal(res[0], expected[0])
  1347. assert_approx_equal(res[1], expected[1])
  1348. # swap a couple more
  1349. b = y[5]
  1350. y[5] = y[6]
  1351. y[6] = b
  1352. # Cross-check with exact result from R:
  1353. # cor.test(x,y,method="kendall",exact=1)
  1354. expected = (0.9111111111111111, 2.976190476190e-05)
  1355. for taux in variants:
  1356. res = stats.kendalltau(x, y, variant=taux)
  1357. assert_approx_equal(res[0], expected[0])
  1358. assert_approx_equal(res[1], expected[1])
  1359. # same in opposite direction
  1360. x = np.arange(10)
  1361. y = np.arange(10)[::-1]
  1362. # Cross-check with exact result from R:
  1363. # cor.test(x,y,method="kendall",exact=1)
  1364. expected = (-1.0, 5.511463844797e-07)
  1365. for taux in variants:
  1366. res = stats.kendalltau(x, y, variant=taux)
  1367. assert_approx_equal(res[0], expected[0])
  1368. assert_approx_equal(res[1], expected[1])
  1369. # swap a couple of values
  1370. b = y[1]
  1371. y[1] = y[2]
  1372. y[2] = b
  1373. # Cross-check with exact result from R:
  1374. # cor.test(x,y,method="kendall",exact=1)
  1375. expected = (-0.9555555555555556, 5.511463844797e-06)
  1376. for taux in variants:
  1377. res = stats.kendalltau(x, y, variant=taux)
  1378. assert_approx_equal(res[0], expected[0])
  1379. assert_approx_equal(res[1], expected[1])
  1380. # swap a couple more
  1381. b = y[5]
  1382. y[5] = y[6]
  1383. y[6] = b
  1384. # Cross-check with exact result from R:
  1385. # cor.test(x,y,method="kendall",exact=1)
  1386. expected = (-0.9111111111111111, 2.976190476190e-05)
  1387. for taux in variants:
  1388. res = stats.kendalltau(x, y, variant=taux)
  1389. assert_approx_equal(res[0], expected[0])
  1390. assert_approx_equal(res[1], expected[1])
  1391. # Check a case where variants are different
  1392. # Example values found from Kendall (1970).
  1393. # P-value is the same for the both variants
  1394. x = array([1, 2, 2, 4, 4, 6, 6, 8, 9, 9])
  1395. y = array([1, 2, 4, 4, 4, 4, 8, 8, 8, 10])
  1396. expected = 0.85895569
  1397. assert_approx_equal(stats.kendalltau(x, y, variant='b')[0], expected)
  1398. expected = 0.825
  1399. assert_approx_equal(stats.kendalltau(x, y, variant='c')[0], expected)
  1400. # check exception in case of ties and method='exact' requested
  1401. y[2] = y[1]
  1402. assert_raises(ValueError, stats.kendalltau, x, y, method='exact')
  1403. # check exception in case of invalid method keyword
  1404. assert_raises(ValueError, stats.kendalltau, x, y, method='banana')
  1405. # check exception in case of invalid variant keyword
  1406. assert_raises(ValueError, stats.kendalltau, x, y, variant='rms')
  1407. # tau-b with some ties
  1408. # Cross-check with R:
  1409. # cor.test(c(12,2,1,12,2),c(1,4,7,1,0),method="kendall",exact=FALSE)
  1410. x1 = [12, 2, 1, 12, 2]
  1411. x2 = [1, 4, 7, 1, 0]
  1412. expected = (-0.47140452079103173, 0.28274545993277478)
  1413. res = stats.kendalltau(x1, x2)
  1414. assert_approx_equal(res[0], expected[0])
  1415. assert_approx_equal(res[1], expected[1])
  1416. # test for namedtuple attribute results
  1417. attributes = ('correlation', 'pvalue')
  1418. for taux in variants:
  1419. res = stats.kendalltau(x1, x2, variant=taux)
  1420. check_named_results(res, attributes)
  1421. assert_equal(res.correlation, res.statistic)
  1422. # with only ties in one or both inputs in tau-b or tau-c
  1423. for taux in variants:
  1424. assert_equal(stats.kendalltau([2, 2, 2], [2, 2, 2], variant=taux),
  1425. (np.nan, np.nan))
  1426. assert_equal(stats.kendalltau([2, 0, 2], [2, 2, 2], variant=taux),
  1427. (np.nan, np.nan))
  1428. assert_equal(stats.kendalltau([2, 2, 2], [2, 0, 2], variant=taux),
  1429. (np.nan, np.nan))
  1430. # empty arrays provided as input
  1431. with pytest.warns(SmallSampleWarning, match="One or more sample..."):
  1432. assert_equal(stats.kendalltau([], []), (np.nan, np.nan))
  1433. # check with larger arrays
  1434. rng = np.random.RandomState(7546)
  1435. x = np.array([rng.normal(loc=1, scale=1, size=500),
  1436. rng.normal(loc=1, scale=1, size=500)])
  1437. corr = [[1.0, 0.3],
  1438. [0.3, 1.0]]
  1439. x = np.dot(np.linalg.cholesky(corr), x)
  1440. expected = (0.19291382765531062, 1.1337095377742629e-10)
  1441. res = stats.kendalltau(x[0], x[1])
  1442. assert_approx_equal(res[0], expected[0])
  1443. assert_approx_equal(res[1], expected[1])
  1444. # this should result in 1 for taub but not tau-c
  1445. assert_approx_equal(stats.kendalltau([1, 1, 2], [1, 1, 2], variant='b')[0],
  1446. 1.0)
  1447. assert_approx_equal(stats.kendalltau([1, 1, 2], [1, 1, 2], variant='c')[0],
  1448. 0.88888888)
  1449. # test nan_policy
  1450. x = np.arange(10.)
  1451. x[9] = np.nan
  1452. assert_array_equal(stats.kendalltau(x, x), (np.nan, np.nan))
  1453. assert_allclose(stats.kendalltau(x, x, nan_policy='omit'),
  1454. (1.0, 5.5114638e-6), rtol=1e-06)
  1455. assert_allclose(stats.kendalltau(x, x, nan_policy='omit', method='asymptotic'),
  1456. (1.0, 0.00017455009626808976), rtol=1e-06)
  1457. assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='raise')
  1458. assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='foobar')
  1459. # test unequal length inputs
  1460. x = np.arange(10.)
  1461. y = np.arange(20.)
  1462. assert_raises(ValueError, stats.kendalltau, x, y)
  1463. # test all ties
  1464. with pytest.warns(SmallSampleWarning, match="One or more sample..."):
  1465. tau, p_value = stats.kendalltau([0], [0])
  1466. assert_equal(np.nan, tau)
  1467. assert_equal(np.nan, p_value)
  1468. # Regression test for GitHub issue #6061 - Overflow on Windows
  1469. x = np.arange(2000, dtype=float)
  1470. x = np.ma.masked_greater(x, 1995)
  1471. y = np.arange(2000, dtype=float)
  1472. y = np.concatenate((y[1000:], y[:1000]))
  1473. assert_(np.isfinite(stats.mstats.kendalltau(x,y)[1]))
  1474. def test_kendalltau_vs_mstats_basic():
  1475. rng = np.random.RandomState(42)
  1476. for s in range(3, 10):
  1477. a = []
  1478. # Generate rankings with ties
  1479. for i in range(s):
  1480. a += [i]*i
  1481. b = list(a)
  1482. rng.shuffle(a)
  1483. rng.shuffle(b)
  1484. expected = mstats_basic.kendalltau(a, b)
  1485. actual = stats.kendalltau(a, b)
  1486. assert_approx_equal(actual[0], expected[0])
  1487. assert_approx_equal(actual[1], expected[1])
  1488. def test_kendalltau_nan_2nd_arg():
  1489. # regression test for gh-6134: nans in the second arg were not handled
  1490. x = [1., 2., 3., 4.]
  1491. y = [np.nan, 2.4, 3.4, 3.4]
  1492. r1 = stats.kendalltau(x, y, nan_policy='omit')
  1493. r2 = stats.kendalltau(x[1:], y[1:])
  1494. assert_allclose(r1.statistic, r2.statistic, atol=1e-15)
  1495. @pytest.mark.thread_unsafe(reason="fails in parallel")
  1496. def test_kendalltau_gh18139_overflow():
  1497. # gh-18139 reported an overflow in `kendalltau` that appeared after
  1498. # SciPy 0.15.1. Check that this particular overflow does not occur.
  1499. # (Test would fail if warning were emitted.)
  1500. import random
  1501. random.seed(6272161)
  1502. classes = [1, 2, 3, 4, 5, 6, 7]
  1503. n_samples = 2 * 10 ** 5
  1504. x = random.choices(classes, k=n_samples)
  1505. y = random.choices(classes, k=n_samples)
  1506. res = stats.kendalltau(x, y)
  1507. # Reference value from SciPy 0.15.1
  1508. assert_allclose(res.statistic, 0.0011816493905730343)
  1509. # Reference p-value from `permutation_test` w/ n_resamples=9999 (default).
  1510. # Expected to be accurate to at least two digits.
  1511. assert_allclose(res.pvalue, 0.4894, atol=2e-3)
  1512. class TestKendallTauAlternative:
  1513. def test_kendalltau_alternative_asymptotic(self):
  1514. # Test alternative parameter, asymptotic method (due to tie)
  1515. # Based on TestCorrSpearman2::test_alternative
  1516. x1 = [1, 2, 3, 4, 5]
  1517. x2 = [5, 6, 7, 8, 7]
  1518. # strong positive correlation
  1519. expected = stats.kendalltau(x1, x2, alternative="two-sided")
  1520. assert expected[0] > 0
  1521. # rank correlation > 0 -> large "less" p-value
  1522. res = stats.kendalltau(x1, x2, alternative="less")
  1523. assert_equal(res[0], expected[0])
  1524. assert_allclose(res[1], 1 - (expected[1] / 2))
  1525. # rank correlation > 0 -> small "greater" p-value
  1526. res = stats.kendalltau(x1, x2, alternative="greater")
  1527. assert_equal(res[0], expected[0])
  1528. assert_allclose(res[1], expected[1] / 2)
  1529. # reverse the direction of rank correlation
  1530. x2.reverse()
  1531. # strong negative correlation
  1532. expected = stats.kendalltau(x1, x2, alternative="two-sided")
  1533. assert expected[0] < 0
  1534. # rank correlation < 0 -> large "greater" p-value
  1535. res = stats.kendalltau(x1, x2, alternative="greater")
  1536. assert_equal(res[0], expected[0])
  1537. assert_allclose(res[1], 1 - (expected[1] / 2))
  1538. # rank correlation < 0 -> small "less" p-value
  1539. res = stats.kendalltau(x1, x2, alternative="less")
  1540. assert_equal(res[0], expected[0])
  1541. assert_allclose(res[1], expected[1] / 2)
  1542. with pytest.raises(ValueError, match="`alternative` must be 'less'..."):
  1543. stats.kendalltau(x1, x2, alternative="ekki-ekki")
  1544. # There are a lot of special cases considered in the calculation of the
  1545. # exact p-value, so we test each separately. We also need to test
  1546. # separately when the observed statistic is in the left tail vs the right
  1547. # tail because the code leverages symmetry of the null distribution; to
  1548. # do that we use the same test case but negate one of the samples.
  1549. # Reference values computed using R cor.test, e.g.
  1550. # options(digits=16)
  1551. # x <- c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1)
  1552. # y <- c( 2.6, 3.1, 2.5, 5.0, 3.6, 4.0, 5.2, 2.8, 3.8)
  1553. # cor.test(x, y, method = "kendall", alternative = "g")
  1554. alternatives = ('less', 'two-sided', 'greater')
  1555. p_n1 = [np.nan, np.nan, np.nan]
  1556. p_n2 = [1, 1, 0.5]
  1557. p_c0 = [1, 0.3333333333333, 0.1666666666667]
  1558. p_c1 = [0.9583333333333, 0.3333333333333, 0.1666666666667]
  1559. p_no_correlation = [0.5916666666667, 1, 0.5916666666667]
  1560. p_no_correlationb = [0.5475694444444, 1, 0.5475694444444]
  1561. p_n_lt_171 = [0.9624118165785, 0.1194389329806, 0.0597194664903]
  1562. p_n_lt_171b = [0.246236925303, 0.4924738506059, 0.755634083327]
  1563. p_n_lt_171c = [0.9847475308925, 0.03071385306533, 0.01535692653267]
  1564. def exact_test(self, x, y, alternative, rev, stat_expected, p_expected):
  1565. if rev:
  1566. y = -np.asarray(y)
  1567. stat_expected *= -1
  1568. res = stats.kendalltau(x, y, method='exact', alternative=alternative)
  1569. res_expected = stat_expected, p_expected
  1570. assert_allclose(res, res_expected)
  1571. case_R_n1 = (list(zip(alternatives, p_n1, [False]*3))
  1572. + list(zip(alternatives, reversed(p_n1), [True]*3)))
  1573. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_n1)
  1574. def test_against_R_n1(self, alternative, p_expected, rev):
  1575. x, y = [1], [2]
  1576. stat_expected = np.nan
  1577. with pytest.warns(SmallSampleWarning, match="One or more sample..."):
  1578. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1579. case_R_n2 = (list(zip(alternatives, p_n2, [False]*3))
  1580. + list(zip(alternatives, reversed(p_n2), [True]*3)))
  1581. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_n2)
  1582. def test_against_R_n2(self, alternative, p_expected, rev):
  1583. x, y = [1, 2], [3, 4]
  1584. stat_expected = 0.9999999999999998
  1585. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1586. case_R_c0 = (list(zip(alternatives, p_c0, [False]*3))
  1587. + list(zip(alternatives, reversed(p_c0), [True]*3)))
  1588. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_c0)
  1589. def test_against_R_c0(self, alternative, p_expected, rev):
  1590. x, y = [1, 2, 3], [1, 2, 3]
  1591. stat_expected = 1
  1592. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1593. case_R_c1 = (list(zip(alternatives, p_c1, [False]*3))
  1594. + list(zip(alternatives, reversed(p_c1), [True]*3)))
  1595. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_c1)
  1596. def test_against_R_c1(self, alternative, p_expected, rev):
  1597. x, y = [1, 2, 3, 4], [1, 2, 4, 3]
  1598. stat_expected = 0.6666666666666667
  1599. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1600. case_R_no_corr = (list(zip(alternatives, p_no_correlation, [False]*3))
  1601. + list(zip(alternatives, reversed(p_no_correlation),
  1602. [True]*3)))
  1603. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_no_corr)
  1604. def test_against_R_no_correlation(self, alternative, p_expected, rev):
  1605. x, y = [1, 2, 3, 4, 5], [1, 5, 4, 2, 3]
  1606. stat_expected = 0
  1607. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1608. case_no_cor_b = (list(zip(alternatives, p_no_correlationb, [False]*3))
  1609. + list(zip(alternatives, reversed(p_no_correlationb),
  1610. [True]*3)))
  1611. @pytest.mark.parametrize("alternative, p_expected, rev", case_no_cor_b)
  1612. def test_against_R_no_correlationb(self, alternative, p_expected, rev):
  1613. x, y = [1, 2, 3, 4, 5, 6, 7, 8], [8, 6, 1, 3, 2, 5, 4, 7]
  1614. stat_expected = 0
  1615. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1616. case_R_lt_171 = (list(zip(alternatives, p_n_lt_171, [False]*3))
  1617. + list(zip(alternatives, reversed(p_n_lt_171), [True]*3)))
  1618. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_lt_171)
  1619. def test_against_R_lt_171(self, alternative, p_expected, rev):
  1620. # Data from Hollander & Wolfe (1973), p. 187f.
  1621. # Used from https://rdrr.io/r/stats/cor.test.html
  1622. x = [44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1]
  1623. y = [2.6, 3.1, 2.5, 5.0, 3.6, 4.0, 5.2, 2.8, 3.8]
  1624. stat_expected = 0.4444444444444445
  1625. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1626. case_R_lt_171b = (list(zip(alternatives, p_n_lt_171b, [False]*3))
  1627. + list(zip(alternatives, reversed(p_n_lt_171b),
  1628. [True]*3)))
  1629. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_lt_171b)
  1630. def test_against_R_lt_171b(self, alternative, p_expected, rev):
  1631. rng = np.random.RandomState(0)
  1632. x = rng.rand(100)
  1633. y = rng.rand(100)
  1634. stat_expected = -0.04686868686868687
  1635. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1636. case_R_lt_171c = (list(zip(alternatives, p_n_lt_171c, [False]*3))
  1637. + list(zip(alternatives, reversed(p_n_lt_171c),
  1638. [True]*3)))
  1639. @pytest.mark.parametrize("alternative, p_expected, rev", case_R_lt_171c)
  1640. def test_against_R_lt_171c(self, alternative, p_expected, rev):
  1641. rng = np.random.RandomState(0)
  1642. x = rng.rand(170)
  1643. y = rng.rand(170)
  1644. stat_expected = 0.1115906717716673
  1645. self.exact_test(x, y, alternative, rev, stat_expected, p_expected)
  1646. case_gt_171 = (list(zip(alternatives, [False]*3)) +
  1647. list(zip(alternatives, [True]*3)))
  1648. @pytest.mark.parametrize("alternative, rev", case_gt_171)
  1649. def test_gt_171(self, alternative, rev):
  1650. rng = np.random.RandomState(0)
  1651. x = rng.rand(400)
  1652. y = rng.rand(400)
  1653. res0 = stats.kendalltau(x, y, method='exact',
  1654. alternative=alternative)
  1655. res1 = stats.kendalltau(x, y, method='asymptotic',
  1656. alternative=alternative)
  1657. assert_equal(res0[0], res1[0])
  1658. assert_allclose(res0[1], res1[1], rtol=1e-3)
  1659. @pytest.mark.parametrize("method", ('exact', 'asymptotic'))
  1660. @pytest.mark.parametrize("alternative", ('two-sided', 'less', 'greater'))
  1661. def test_nan_policy(self, method, alternative):
  1662. # Test nan policies
  1663. x1 = [1, 2, 3, 4, 5]
  1664. x2 = [5, 6, 7, 8, 9]
  1665. x1nan = x1 + [np.nan]
  1666. x2nan = x2 + [np.nan]
  1667. # test nan_policy="propagate"
  1668. res_actual = stats.kendalltau(x1nan, x2nan,
  1669. method=method, alternative=alternative)
  1670. res_expected = (np.nan, np.nan)
  1671. assert_allclose(res_actual, res_expected)
  1672. # test nan_policy="omit"
  1673. res_actual = stats.kendalltau(x1nan, x2nan, nan_policy='omit',
  1674. method=method, alternative=alternative)
  1675. res_expected = stats.kendalltau(x1, x2, method=method,
  1676. alternative=alternative)
  1677. assert_allclose(res_actual, res_expected)
  1678. # test nan_policy="raise"
  1679. message = 'The input contains nan values'
  1680. with pytest.raises(ValueError, match=message):
  1681. stats.kendalltau(x1nan, x2nan, nan_policy='raise',
  1682. method=method, alternative=alternative)
  1683. # test invalid nan_policy
  1684. message = "nan_policy must be one of..."
  1685. with pytest.raises(ValueError, match=message):
  1686. stats.kendalltau(x1nan, x2nan, nan_policy='ekki-ekki',
  1687. method=method, alternative=alternative)
  1688. def test_weightedtau():
  1689. x = [12, 2, 1, 12, 2]
  1690. y = [1, 4, 7, 1, 0]
  1691. tau, p_value = stats.weightedtau(x, y)
  1692. assert_approx_equal(tau, -0.56694968153682723)
  1693. assert_equal(np.nan, p_value)
  1694. tau, p_value = stats.weightedtau(x, y, additive=False)
  1695. assert_approx_equal(tau, -0.62205716951801038)
  1696. assert_equal(np.nan, p_value)
  1697. # This must be exactly Kendall's tau
  1698. tau, p_value = stats.weightedtau(x, y, weigher=lambda x: 1)
  1699. assert_approx_equal(tau, -0.47140452079103173)
  1700. assert_equal(np.nan, p_value)
  1701. # test for namedtuple attribute results
  1702. res = stats.weightedtau(x, y)
  1703. attributes = ('correlation', 'pvalue')
  1704. check_named_results(res, attributes)
  1705. assert_equal(res.correlation, res.statistic)
  1706. # Asymmetric, ranked version
  1707. tau, p_value = stats.weightedtau(x, y, rank=None)
  1708. assert_approx_equal(tau, -0.4157652301037516)
  1709. assert_equal(np.nan, p_value)
  1710. tau, p_value = stats.weightedtau(y, x, rank=None)
  1711. assert_approx_equal(tau, -0.7181341329699029)
  1712. assert_equal(np.nan, p_value)
  1713. tau, p_value = stats.weightedtau(x, y, rank=None, additive=False)
  1714. assert_approx_equal(tau, -0.40644850966246893)
  1715. assert_equal(np.nan, p_value)
  1716. tau, p_value = stats.weightedtau(y, x, rank=None, additive=False)
  1717. assert_approx_equal(tau, -0.83766582937355172)
  1718. assert_equal(np.nan, p_value)
  1719. tau, p_value = stats.weightedtau(x, y, rank=False)
  1720. assert_approx_equal(tau, -0.51604397940261848)
  1721. assert_equal(np.nan, p_value)
  1722. # This must be exactly Kendall's tau
  1723. tau, p_value = stats.weightedtau(x, y, rank=True, weigher=lambda x: 1)
  1724. assert_approx_equal(tau, -0.47140452079103173)
  1725. assert_equal(np.nan, p_value)
  1726. tau, p_value = stats.weightedtau(y, x, rank=True, weigher=lambda x: 1)
  1727. assert_approx_equal(tau, -0.47140452079103173)
  1728. assert_equal(np.nan, p_value)
  1729. # Test argument conversion
  1730. tau, p_value = stats.weightedtau(np.asarray(x, dtype=np.float64), y)
  1731. assert_approx_equal(tau, -0.56694968153682723)
  1732. tau, p_value = stats.weightedtau(np.asarray(x, dtype=np.int16), y)
  1733. assert_approx_equal(tau, -0.56694968153682723)
  1734. tau, p_value = stats.weightedtau(np.asarray(x, dtype=np.float64),
  1735. np.asarray(y, dtype=np.float64))
  1736. assert_approx_equal(tau, -0.56694968153682723)
  1737. # All ties
  1738. with pytest.warns(SmallSampleWarning, match="One or more sample..."):
  1739. tau, p_value = stats.weightedtau([], [])
  1740. assert_equal(np.nan, tau)
  1741. assert_equal(np.nan, p_value)
  1742. with pytest.warns(SmallSampleWarning, match="One or more sample..."):
  1743. tau, p_value = stats.weightedtau([0], [0])
  1744. assert_equal(np.nan, tau)
  1745. assert_equal(np.nan, p_value)
  1746. # Size mismatches
  1747. assert_raises(ValueError, stats.weightedtau, [0, 1], [0, 1, 2])
  1748. assert_raises(ValueError, stats.weightedtau, [0, 1], [0, 1], [0, 1, 2])
  1749. # NaNs
  1750. x = [12, 2, 1, 12, 2]
  1751. y = [1, 4, 7, 1, np.nan]
  1752. tau, p_value = stats.weightedtau(x, y)
  1753. assert_approx_equal(tau, -0.56694968153682723)
  1754. x = [12, 2, np.nan, 12, 2]
  1755. tau, p_value = stats.weightedtau(x, y)
  1756. assert_approx_equal(tau, -0.56694968153682723)
  1757. # NaNs when the dtype of x and y are all np.float64
  1758. x = [12.0, 2.0, 1.0, 12.0, 2.0]
  1759. y = [1.0, 4.0, 7.0, 1.0, np.nan]
  1760. tau, p_value = stats.weightedtau(x, y)
  1761. assert_approx_equal(tau, -0.56694968153682723)
  1762. x = [12.0, 2.0, np.nan, 12.0, 2.0]
  1763. tau, p_value = stats.weightedtau(x, y)
  1764. assert_approx_equal(tau, -0.56694968153682723)
  1765. # NaNs when there are more than one NaN in x or y
  1766. x = [12.0, 2.0, 1.0, 12.0, 1.0]
  1767. y = [1.0, 4.0, 7.0, 1.0, 1.0]
  1768. tau, p_value = stats.weightedtau(x, y)
  1769. assert_approx_equal(tau, -0.6615242347139803)
  1770. x = [12.0, 2.0, np.nan, 12.0, np.nan]
  1771. tau, p_value = stats.weightedtau(x, y)
  1772. assert_approx_equal(tau, -0.6615242347139803)
  1773. y = [np.nan, 4.0, 7.0, np.nan, np.nan]
  1774. tau, p_value = stats.weightedtau(x, y)
  1775. assert_approx_equal(tau, -0.6615242347139803)
  1776. def test_segfault_issue_9710():
  1777. # https://github.com/scipy/scipy/issues/9710
  1778. # This test was created to check segfault
  1779. # In issue SEGFAULT only repros in optimized builds after calling the function twice
  1780. message = "One or more sample arguments is too small"
  1781. with pytest.warns(SmallSampleWarning, match=message):
  1782. stats.weightedtau([1], [1.0])
  1783. stats.weightedtau([1], [1.0])
  1784. # The code below also caused SEGFAULT
  1785. stats.weightedtau([np.nan], [52])
  1786. def test_kendall_tau_large():
  1787. n = 172
  1788. # Test omit policy
  1789. x = np.arange(n + 1).astype(float)
  1790. y = np.arange(n + 1).astype(float)
  1791. y[-1] = np.nan
  1792. _, pval = stats.kendalltau(x, y, method='exact', nan_policy='omit')
  1793. assert_equal(pval, 0.0)
  1794. def test_weightedtau_vs_quadratic():
  1795. # Trivial quadratic implementation, all parameters mandatory
  1796. def wkq(x, y, rank, weigher, add):
  1797. tot = conc = disc = u = v = 0
  1798. for (i, j) in product(range(len(x)), range(len(x))):
  1799. w = weigher(rank[i]) + weigher(rank[j]) if add \
  1800. else weigher(rank[i]) * weigher(rank[j])
  1801. tot += w
  1802. if x[i] == x[j]:
  1803. u += w
  1804. if y[i] == y[j]:
  1805. v += w
  1806. if x[i] < x[j] and y[i] < y[j] or x[i] > x[j] and y[i] > y[j]:
  1807. conc += w
  1808. elif x[i] < x[j] and y[i] > y[j] or x[i] > x[j] and y[i] < y[j]:
  1809. disc += w
  1810. return (conc - disc) / np.sqrt(tot - u) / np.sqrt(tot - v)
  1811. def weigher(x):
  1812. return 1. / (x + 1)
  1813. rng = np.random.default_rng(42)
  1814. for s in range(3,10):
  1815. a = []
  1816. # Generate rankings with ties
  1817. for i in range(s):
  1818. a += [i]*i
  1819. b = list(a)
  1820. rng.shuffle(a)
  1821. rng.shuffle(b)
  1822. # First pass: use element indices as ranks
  1823. rank = np.arange(len(a), dtype=np.intp)
  1824. for _ in range(2):
  1825. for add in [True, False]:
  1826. expected = wkq(a, b, rank, weigher, add)
  1827. actual = stats.weightedtau(a, b, rank, weigher, add).statistic
  1828. assert_approx_equal(expected, actual)
  1829. # Second pass: use a random rank
  1830. rng.shuffle(rank)
  1831. class TestRegression:
  1832. def test_linregressBIGX(self):
  1833. # W.II.F. Regress BIG on X.
  1834. result = stats.linregress(X, BIG)
  1835. assert_almost_equal(result.intercept, 99999990)
  1836. assert_almost_equal(result.rvalue, 1.0)
  1837. # The uncertainty ought to be almost zero
  1838. # since all points lie on a line
  1839. assert_almost_equal(result.stderr, 0.0)
  1840. assert_almost_equal(result.intercept_stderr, 0.0)
  1841. def test_regressXX(self):
  1842. # W.IV.B. Regress X on X.
  1843. # The constant should be exactly 0 and the regression coefficient
  1844. # should be 1. This is a perfectly valid regression and the
  1845. # program should not complain.
  1846. result = stats.linregress(X, X)
  1847. assert_almost_equal(result.intercept, 0.0)
  1848. assert_almost_equal(result.rvalue, 1.0)
  1849. # The uncertainly on regression through two points ought to be 0
  1850. assert_almost_equal(result.stderr, 0.0)
  1851. assert_almost_equal(result.intercept_stderr, 0.0)
  1852. # W.IV.C. Regress X on BIG and LITTLE (two predictors). The program
  1853. # should tell you that this model is "singular" because BIG and
  1854. # LITTLE are linear combinations of each other. Cryptic error
  1855. # messages are unacceptable here. Singularity is the most
  1856. # fundamental regression error.
  1857. #
  1858. # Need to figure out how to handle multiple linear regression.
  1859. # This is not obvious
  1860. def test_regressZEROX(self):
  1861. # W.IV.D. Regress ZERO on X.
  1862. # The program should inform you that ZERO has no variance or it should
  1863. # go ahead and compute the regression and report a correlation and
  1864. # total sum of squares of exactly 0.
  1865. result = stats.linregress(X, ZERO)
  1866. assert_almost_equal(result.intercept, 0.0)
  1867. with pytest.warns(stats.ConstantInputWarning, match="An input array..."):
  1868. ref_rvalue = stats.pearsonr(X, ZERO).statistic
  1869. assert_almost_equal(result.rvalue, ref_rvalue)
  1870. def test_regress_simple(self):
  1871. # Regress a line with sinusoidal noise.
  1872. x = np.linspace(0, 100, 100)
  1873. y = 0.2 * np.linspace(0, 100, 100) + 10
  1874. y += np.sin(np.linspace(0, 20, 100))
  1875. result = stats.linregress(x, y)
  1876. lr = LinregressResult
  1877. assert_(isinstance(result, lr))
  1878. assert_almost_equal(result.stderr, 2.3957814497838803e-3)
  1879. def test_regress_alternative(self):
  1880. # test alternative parameter
  1881. x = np.linspace(0, 100, 100)
  1882. y = 0.2 * np.linspace(0, 100, 100) + 10 # slope is greater than zero
  1883. y += np.sin(np.linspace(0, 20, 100))
  1884. with pytest.raises(ValueError, match="`alternative` must be 'less'..."):
  1885. stats.linregress(x, y, alternative="ekki-ekki")
  1886. res1 = stats.linregress(x, y, alternative="two-sided")
  1887. # slope is greater than zero, so "less" p-value should be large
  1888. res2 = stats.linregress(x, y, alternative="less")
  1889. assert_allclose(res2.pvalue, 1 - (res1.pvalue / 2))
  1890. # slope is greater than zero, so "greater" p-value should be small
  1891. res3 = stats.linregress(x, y, alternative="greater")
  1892. assert_allclose(res3.pvalue, res1.pvalue / 2)
  1893. assert res1.rvalue == res2.rvalue == res3.rvalue
  1894. def test_regress_against_R(self):
  1895. # test against R `lm`
  1896. # options(digits=16)
  1897. # x <- c(151, 174, 138, 186, 128, 136, 179, 163, 152, 131)
  1898. # y <- c(63, 81, 56, 91, 47, 57, 76, 72, 62, 48)
  1899. # relation <- lm(y~x)
  1900. # print(summary(relation))
  1901. x = [151, 174, 138, 186, 128, 136, 179, 163, 152, 131]
  1902. y = [63, 81, 56, 91, 47, 57, 76, 72, 62, 48]
  1903. res = stats.linregress(x, y, alternative="two-sided")
  1904. # expected values from R's `lm` above
  1905. assert_allclose(res.slope, 0.6746104491292)
  1906. assert_allclose(res.intercept, -38.4550870760770)
  1907. assert_allclose(res.rvalue, np.sqrt(0.95478224775))
  1908. assert_allclose(res.pvalue, 1.16440531074e-06)
  1909. assert_allclose(res.stderr, 0.0519051424731)
  1910. assert_allclose(res.intercept_stderr, 8.0490133029927)
  1911. def test_linregress(self):
  1912. # compared with multivariate ols with pinv
  1913. x = np.arange(11)
  1914. y = np.arange(5, 16)
  1915. y[[(1), (-2)]] -= 1
  1916. y[[(0), (-1)]] += 1
  1917. result = stats.linregress(x, y)
  1918. # This test used to use 'assert_array_almost_equal' but its
  1919. # formulation got confusing since LinregressResult became
  1920. # _lib._bunch._make_tuple_bunch instead of namedtuple
  1921. # (for backwards compatibility, see PR #12983)
  1922. def assert_ae(x, y):
  1923. return assert_almost_equal(x, y, decimal=14)
  1924. assert_ae(result.slope, 1.0)
  1925. assert_ae(result.intercept, 5.0)
  1926. assert_ae(result.rvalue, 0.98229948625750)
  1927. assert_ae(result.pvalue, 7.45259691e-008)
  1928. assert_ae(result.stderr, 0.063564172616372733)
  1929. assert_ae(result.intercept_stderr, 0.37605071654517686)
  1930. def test_regress_simple_negative_cor(self):
  1931. # If the slope of the regression is negative the factor R tend
  1932. # to -1 not 1. Sometimes rounding errors makes it < -1
  1933. # leading to stderr being NaN.
  1934. a, n = 1e-71, 100000
  1935. x = np.linspace(a, 2 * a, n)
  1936. y = np.linspace(2 * a, a, n)
  1937. result = stats.linregress(x, y)
  1938. # Make sure propagated numerical errors
  1939. # did not bring rvalue below -1 (or were coerced)
  1940. assert_(result.rvalue >= -1)
  1941. assert_almost_equal(result.rvalue, -1)
  1942. # slope and intercept stderror should stay numeric
  1943. assert_(not np.isnan(result.stderr))
  1944. assert_(not np.isnan(result.intercept_stderr))
  1945. def test_linregress_result_attributes(self):
  1946. x = np.linspace(0, 100, 100)
  1947. y = 0.2 * np.linspace(0, 100, 100) + 10
  1948. y += np.sin(np.linspace(0, 20, 100))
  1949. result = stats.linregress(x, y)
  1950. # Result is of a correct class
  1951. lr = LinregressResult
  1952. assert_(isinstance(result, lr))
  1953. # LinregressResult elements have correct names
  1954. attributes = ('slope', 'intercept', 'rvalue', 'pvalue', 'stderr')
  1955. check_named_results(result, attributes)
  1956. # Also check that the extra attribute (intercept_stderr) is present
  1957. assert 'intercept_stderr' in dir(result)
  1958. def test_regress_two_inputs(self):
  1959. # Regress a simple line formed by two points.
  1960. x = np.arange(2)
  1961. y = np.arange(3, 5)
  1962. result = stats.linregress(x, y)
  1963. # Non-horizontal line
  1964. assert_almost_equal(result.pvalue, 0.0)
  1965. # Zero error through two points
  1966. assert_almost_equal(result.stderr, 0.0)
  1967. assert_almost_equal(result.intercept_stderr, 0.0)
  1968. def test_regress_two_inputs_horizontal_line(self):
  1969. # Regress a horizontal line formed by two points.
  1970. x = np.arange(2)
  1971. y = np.ones(2)
  1972. result = stats.linregress(x, y)
  1973. # Horizontal line
  1974. assert_almost_equal(result.pvalue, 1.0)
  1975. # Zero error through two points
  1976. assert_almost_equal(result.stderr, 0.0)
  1977. assert_almost_equal(result.intercept_stderr, 0.0)
  1978. def test_nist_norris(self):
  1979. # If this causes a lint failure in the future, please note the history of
  1980. # requests to allow extra whitespace in table formatting (e.g. gh-12367).
  1981. # Also see https://github.com/scipy/scipy/wiki/Why-do-we-not-use-an-auto%E2%80%90formatter%3F # noqa: E501
  1982. x = [ 0.2, 337.4, 118.2, 884.6, 10.1, 226.5,
  1983. 666.3, 996.3, 448.6, 777.0, 558.2, 0.4,
  1984. 0.6, 775.5, 666.9, 338.0, 447.5, 11.6,
  1985. 556.0, 228.1, 995.8, 887.6, 120.2, 0.3,
  1986. 0.3, 556.8, 339.1, 887.2, 999.0, 779.0,
  1987. 11.1, 118.3, 229.2, 669.1, 448.9, 0.5]
  1988. y = [ 0.1, 338.8, 118.1, 888.0, 9.2, 228.1,
  1989. 668.5, 998.5, 449.1, 778.9, 559.2, 0.3,
  1990. 0.1, 778.1, 668.8, 339.3, 448.9, 10.8,
  1991. 557.7, 228.3, 998.0, 888.8, 119.6, 0.3,
  1992. 0.6, 557.6, 339.3, 888.0, 998.5, 778.9,
  1993. 10.2, 117.6, 228.9, 668.4, 449.2, 0.2]
  1994. result = stats.linregress(x, y)
  1995. assert_almost_equal(result.slope, 1.00211681802045)
  1996. assert_almost_equal(result.intercept, -0.262323073774029)
  1997. assert_almost_equal(result.rvalue**2, 0.999993745883712)
  1998. assert_almost_equal(result.pvalue, 0.0)
  1999. assert_almost_equal(result.stderr, 0.00042979684820)
  2000. assert_almost_equal(result.intercept_stderr, 0.23281823430153)
  2001. def test_compare_to_polyfit(self):
  2002. x = np.linspace(0, 100, 100)
  2003. y = 0.2 * np.linspace(0, 100, 100) + 10
  2004. y += np.sin(np.linspace(0, 20, 100))
  2005. result = stats.linregress(x, y)
  2006. poly = np.polyfit(x, y, 1) # Fit 1st degree polynomial
  2007. # Make sure linear regression slope and intercept
  2008. # match with results from numpy polyfit
  2009. assert_almost_equal(result.slope, poly[0])
  2010. assert_almost_equal(result.intercept, poly[1])
  2011. def test_empty_input(self):
  2012. with pytest.warns(SmallSampleWarning, match="One or more sample..."):
  2013. res = stats.linregress([], [])
  2014. assert np.all(np.isnan(res))
  2015. def test_nan_input(self):
  2016. x = np.arange(10.)
  2017. x[9] = np.nan
  2018. with np.errstate(invalid="ignore"):
  2019. result = stats.linregress(x, x)
  2020. # Make sure the result still comes back as `LinregressResult`
  2021. lr = LinregressResult
  2022. assert_(isinstance(result, lr))
  2023. assert_array_equal(result, (np.nan,)*5)
  2024. assert_equal(result.intercept_stderr, np.nan)
  2025. def test_identical_x(self):
  2026. rng = np.random.default_rng(7872425088)
  2027. x = np.zeros(10)
  2028. y = rng.random(10)
  2029. msg = "Cannot calculate a linear regression"
  2030. with assert_raises(ValueError, match=msg):
  2031. stats.linregress(x, y)
  2032. def test_theilslopes():
  2033. # Basic slope test.
  2034. slope, intercept, lower, upper = stats.theilslopes([0,1,1])
  2035. assert_almost_equal(slope, 0.5)
  2036. assert_almost_equal(intercept, 0.5)
  2037. msg = ("method must be either 'joint' or 'separate'."
  2038. "'joint_separate' is invalid.")
  2039. with pytest.raises(ValueError, match=msg):
  2040. stats.theilslopes([0, 1, 1], method='joint_separate')
  2041. slope, intercept, lower, upper = stats.theilslopes([0, 1, 1],
  2042. method='joint')
  2043. assert_almost_equal(slope, 0.5)
  2044. assert_almost_equal(intercept, 0.0)
  2045. # Test of confidence intervals.
  2046. x = [1, 2, 3, 4, 10, 12, 18]
  2047. y = [9, 15, 19, 20, 45, 55, 78]
  2048. slope, intercept, lower, upper = stats.theilslopes(y, x, 0.07,
  2049. method='separate')
  2050. assert_almost_equal(slope, 4)
  2051. assert_almost_equal(intercept, 4.0)
  2052. assert_almost_equal(upper, 4.38, decimal=2)
  2053. assert_almost_equal(lower, 3.71, decimal=2)
  2054. slope, intercept, lower, upper = stats.theilslopes(y, x, 0.07,
  2055. method='joint')
  2056. assert_almost_equal(slope, 4)
  2057. assert_almost_equal(intercept, 6.0)
  2058. assert_almost_equal(upper, 4.38, decimal=2)
  2059. assert_almost_equal(lower, 3.71, decimal=2)
  2060. def test_cumfreq():
  2061. x = [1, 4, 2, 1, 3, 1]
  2062. cumfreqs, lowlim, binsize, extrapoints = stats.cumfreq(x, numbins=4)
  2063. assert_array_almost_equal(cumfreqs, np.array([3., 4., 5., 6.]))
  2064. cumfreqs, lowlim, binsize, extrapoints = stats.cumfreq(
  2065. x, numbins=4, defaultreallimits=(1.5, 5))
  2066. assert_(extrapoints == 3)
  2067. # test for namedtuple attribute results
  2068. attributes = ('cumcount', 'lowerlimit', 'binsize', 'extrapoints')
  2069. res = stats.cumfreq(x, numbins=4, defaultreallimits=(1.5, 5))
  2070. check_named_results(res, attributes)
  2071. def test_relfreq():
  2072. a = np.array([1, 4, 2, 1, 3, 1])
  2073. relfreqs, lowlim, binsize, extrapoints = stats.relfreq(a, numbins=4)
  2074. assert_array_almost_equal(relfreqs,
  2075. array([0.5, 0.16666667, 0.16666667, 0.16666667]))
  2076. # test for namedtuple attribute results
  2077. attributes = ('frequency', 'lowerlimit', 'binsize', 'extrapoints')
  2078. res = stats.relfreq(a, numbins=4)
  2079. check_named_results(res, attributes)
  2080. # check array_like input is accepted
  2081. relfreqs2, lowlim, binsize, extrapoints = stats.relfreq([1, 4, 2, 1, 3, 1],
  2082. numbins=4)
  2083. assert_array_almost_equal(relfreqs, relfreqs2)
  2084. class TestScoreatpercentile:
  2085. def setup_method(self):
  2086. self.a1 = [3, 4, 5, 10, -3, -5, 6]
  2087. self.a2 = [3, -6, -2, 8, 7, 4, 2, 1]
  2088. self.a3 = [3., 4, 5, 10, -3, -5, -6, 7.0]
  2089. def test_basic(self):
  2090. x = arange(8) * 0.5
  2091. assert_equal(stats.scoreatpercentile(x, 0), 0.)
  2092. assert_equal(stats.scoreatpercentile(x, 100), 3.5)
  2093. assert_equal(stats.scoreatpercentile(x, 50), 1.75)
  2094. def test_fraction(self):
  2095. scoreatperc = stats.scoreatpercentile
  2096. # Test defaults
  2097. assert_equal(scoreatperc(list(range(10)), 50), 4.5)
  2098. assert_equal(scoreatperc(list(range(10)), 50, (2,7)), 4.5)
  2099. assert_equal(scoreatperc(list(range(100)), 50, limit=(1, 8)), 4.5)
  2100. assert_equal(scoreatperc(np.array([1, 10,100]), 50, (10,100)), 55)
  2101. assert_equal(scoreatperc(np.array([1, 10,100]), 50, (1,10)), 5.5)
  2102. # explicitly specify interpolation_method 'fraction' (the default)
  2103. assert_equal(scoreatperc(list(range(10)), 50, interpolation_method='fraction'),
  2104. 4.5)
  2105. assert_equal(scoreatperc(list(range(10)), 50, limit=(2, 7),
  2106. interpolation_method='fraction'),
  2107. 4.5)
  2108. assert_equal(scoreatperc(list(range(100)), 50, limit=(1, 8),
  2109. interpolation_method='fraction'),
  2110. 4.5)
  2111. assert_equal(scoreatperc(np.array([1, 10,100]), 50, (10, 100),
  2112. interpolation_method='fraction'),
  2113. 55)
  2114. assert_equal(scoreatperc(np.array([1, 10,100]), 50, (1,10),
  2115. interpolation_method='fraction'),
  2116. 5.5)
  2117. def test_lower_higher(self):
  2118. scoreatperc = stats.scoreatpercentile
  2119. # interpolation_method 'lower'/'higher'
  2120. assert_equal(scoreatperc(list(range(10)), 50,
  2121. interpolation_method='lower'), 4)
  2122. assert_equal(scoreatperc(list(range(10)), 50,
  2123. interpolation_method='higher'), 5)
  2124. assert_equal(scoreatperc(list(range(10)), 50, (2,7),
  2125. interpolation_method='lower'), 4)
  2126. assert_equal(scoreatperc(list(range(10)), 50, limit=(2,7),
  2127. interpolation_method='higher'), 5)
  2128. assert_equal(scoreatperc(list(range(100)), 50, (1,8),
  2129. interpolation_method='lower'), 4)
  2130. assert_equal(scoreatperc(list(range(100)), 50, (1,8),
  2131. interpolation_method='higher'), 5)
  2132. assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (10, 100),
  2133. interpolation_method='lower'), 10)
  2134. assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(10, 100),
  2135. interpolation_method='higher'), 100)
  2136. assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (1, 10),
  2137. interpolation_method='lower'), 1)
  2138. assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(1, 10),
  2139. interpolation_method='higher'), 10)
  2140. def test_sequence_per(self):
  2141. x = arange(8) * 0.5
  2142. expected = np.array([0, 3.5, 1.75])
  2143. res = stats.scoreatpercentile(x, [0, 100, 50])
  2144. assert_allclose(res, expected)
  2145. assert_(isinstance(res, np.ndarray))
  2146. # Test with ndarray. Regression test for gh-2861
  2147. assert_allclose(stats.scoreatpercentile(x, np.array([0, 100, 50])),
  2148. expected)
  2149. # Also test combination of 2-D array, axis not None and array-like per
  2150. res2 = stats.scoreatpercentile(np.arange(12).reshape((3,4)),
  2151. np.array([0, 1, 100, 100]), axis=1)
  2152. expected2 = array([[0, 4, 8],
  2153. [0.03, 4.03, 8.03],
  2154. [3, 7, 11],
  2155. [3, 7, 11]])
  2156. assert_allclose(res2, expected2)
  2157. def test_axis(self):
  2158. scoreatperc = stats.scoreatpercentile
  2159. x = arange(12).reshape(3, 4)
  2160. assert_equal(scoreatperc(x, (25, 50, 100)), [2.75, 5.5, 11.0])
  2161. r0 = [[2, 3, 4, 5], [4, 5, 6, 7], [8, 9, 10, 11]]
  2162. assert_equal(scoreatperc(x, (25, 50, 100), axis=0), r0)
  2163. r1 = [[0.75, 4.75, 8.75], [1.5, 5.5, 9.5], [3, 7, 11]]
  2164. assert_equal(scoreatperc(x, (25, 50, 100), axis=1), r1)
  2165. x = array([[1, 1, 1],
  2166. [1, 1, 1],
  2167. [4, 4, 3],
  2168. [1, 1, 1],
  2169. [1, 1, 1]])
  2170. score = stats.scoreatpercentile(x, 50)
  2171. assert_equal(score.shape, ())
  2172. assert_equal(score, 1.0)
  2173. score = stats.scoreatpercentile(x, 50, axis=0)
  2174. assert_equal(score.shape, (3,))
  2175. assert_equal(score, [1, 1, 1])
  2176. def test_exception(self):
  2177. assert_raises(ValueError, stats.scoreatpercentile, [1, 2], 56,
  2178. interpolation_method='foobar')
  2179. assert_raises(ValueError, stats.scoreatpercentile, [1], 101)
  2180. assert_raises(ValueError, stats.scoreatpercentile, [1], -1)
  2181. def test_empty(self):
  2182. assert_equal(stats.scoreatpercentile([], 50), np.nan)
  2183. assert_equal(stats.scoreatpercentile(np.array([[], []]), 50), np.nan)
  2184. assert_equal(stats.scoreatpercentile([], [50, 99]), [np.nan, np.nan])
  2185. @make_xp_test_case(stats.mode)
  2186. class TestMode:
  2187. def test_empty(self, xp):
  2188. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  2189. vals, counts = stats.mode(xp.asarray([]))
  2190. xp_assert_equal(vals, xp.asarray(xp.nan))
  2191. xp_assert_equal(counts, xp.asarray(0.))
  2192. def test_scalar(self):
  2193. vals, counts = stats.mode(4.)
  2194. assert_equal(vals, np.array([4.]))
  2195. assert_equal(counts, np.array([1]))
  2196. def test_basic(self, xp):
  2197. data1 = xp.asarray([3, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6])
  2198. vals = stats.mode(data1)
  2199. xp_assert_equal(vals[0], xp.asarray(6))
  2200. xp_assert_equal(vals[1], xp.asarray(3))
  2201. def test_axes_keepdims(self, xp):
  2202. data1 = [10, 10, 30, 40]
  2203. data2 = [10, 10, 10, 10]
  2204. data3 = [20, 10, 20, 20]
  2205. data4 = [30, 30, 30, 30]
  2206. data5 = [40, 30, 30, 30]
  2207. arr = xp.asarray([data1, data2, data3, data4, data5])
  2208. vals = stats.mode(arr, axis=None, keepdims=True)
  2209. xp_assert_equal(vals[0], xp.asarray([[30]]))
  2210. xp_assert_equal(vals[1], xp.asarray([[8]]))
  2211. vals = stats.mode(arr, axis=0, keepdims=True)
  2212. xp_assert_equal(vals[0], xp.asarray([[10, 10, 30, 30]]))
  2213. xp_assert_equal(vals[1], xp.asarray([[2, 3, 3, 2]]))
  2214. vals = stats.mode(arr, axis=1, keepdims=True)
  2215. xp_assert_equal(vals[0], xp.asarray([[10], [10], [20], [30], [30]]))
  2216. xp_assert_equal(vals[1], xp.asarray([[2], [4], [3], [4], [3]]))
  2217. def test_axes(self, xp):
  2218. data1 = [10, 10, 30, 40]
  2219. data2 = [10, 10, 10, 10]
  2220. data3 = [20, 10, 20, 20]
  2221. data4 = [30, 30, 30, 30]
  2222. data5 = [40, 30, 30, 30]
  2223. arr = xp.asarray([data1, data2, data3, data4, data5])
  2224. vals = stats.mode(arr, axis=None)
  2225. xp_assert_equal(vals[0], xp.asarray(30))
  2226. xp_assert_equal(vals[1], xp.asarray(8))
  2227. vals = stats.mode(arr, axis=0)
  2228. xp_assert_equal(vals[0], xp.asarray([10, 10, 30, 30]))
  2229. xp_assert_equal(vals[1], xp.asarray([2, 3, 3, 2]))
  2230. vals = stats.mode(arr, axis=1)
  2231. xp_assert_equal(vals[0], xp.asarray([10, 10, 20, 30, 30]))
  2232. xp_assert_equal(vals[1], xp.asarray([2, 4, 3, 4, 3]))
  2233. @pytest.mark.parametrize('axis', range(-4, 0))
  2234. def test_negative_axes_gh_15375(self, axis, xp):
  2235. rng = np.random.default_rng(7090348401)
  2236. a = xp.asarray(rng.random((10, 11, 12, 13)))
  2237. res0 = stats.mode(a, axis=a.ndim+axis)
  2238. res1 = stats.mode(a, axis=axis)
  2239. xp_assert_equal(res0.mode, res1.mode)
  2240. xp_assert_equal(res0.count, res1.count)
  2241. def test_mode_result_attributes(self, xp):
  2242. data1 = xp.asarray([3, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6])
  2243. data2 = xp.asarray([])
  2244. actual = stats.mode(data1)
  2245. attributes = ('mode', 'count')
  2246. check_named_results(actual, attributes, xp=xp)
  2247. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  2248. actual2 = stats.mode(data2)
  2249. check_named_results(actual2, attributes, xp=xp)
  2250. def test_nan_propagate(self, xp):
  2251. data1 = xp.asarray([3, np.nan, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6])
  2252. actual = stats.mode(data1)
  2253. xp_assert_equal(actual[0], xp.asarray(6, dtype=data1.dtype))
  2254. xp_assert_equal(actual[1], xp.asarray(3))
  2255. @skip_xp_backends(eager_only=True, reason="lazy arrays don't do 'raise'.")
  2256. def test_nan_omit(self, xp):
  2257. data1 = xp.asarray([3, np.nan, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6])
  2258. res = stats.mode(data1, nan_policy='omit')
  2259. xp_assert_equal(res.mode, xp.asarray(6.))
  2260. xp_assert_equal(res.count, xp.asarray(3))
  2261. assert_raises(ValueError, stats.mode, data1, nan_policy='raise')
  2262. assert_raises(ValueError, stats.mode, data1, nan_policy='foobar')
  2263. @skip_xp_backends(eager_only=True, reason="lazy arrays don't do 'omit'.")
  2264. @pytest.mark.parametrize("data", [
  2265. [3, 5, 1, 1, 3.],
  2266. [3, np.nan, 5, 1, 1, 3],
  2267. [3, 5, 1.],
  2268. [3, np.nan, 5, 1],
  2269. ])
  2270. @pytest.mark.parametrize('keepdims', [False, True])
  2271. def test_smallest_equal(self, data, keepdims, xp):
  2272. result = stats.mode(xp.asarray(data), nan_policy='omit', keepdims=keepdims)
  2273. if keepdims:
  2274. xp_assert_equal(result[0][0], xp.asarray(1.))
  2275. else:
  2276. xp_assert_equal(result[0], xp.asarray(1.))
  2277. @pytest.mark.parametrize('axis', range(-3, 3))
  2278. def test_mode_shape_gh_9955(self, axis, xp):
  2279. rng = np.random.default_rng(984213899)
  2280. a = xp.asarray(rng.uniform(size=(3, 4, 5)))
  2281. res = stats.mode(a, axis=axis, keepdims=False)
  2282. reference_shape = list(a.shape)
  2283. reference_shape.pop(axis)
  2284. np.testing.assert_array_equal(res.mode.shape, reference_shape)
  2285. np.testing.assert_array_equal(res.count.shape, reference_shape)
  2286. def test_nan_policy_propagate_gh_9815(self, xp):
  2287. # mode should treat np.nan as it would any other object when
  2288. # nan_policy='propagate'
  2289. a = xp.asarray([2, np.nan, 1, np.nan])
  2290. res = stats.mode(a)
  2291. assert xp.isnan(res.mode) and res.count == 2
  2292. def test_keepdims_empty(self, xp):
  2293. # test empty arrays
  2294. a = xp.zeros((1, 2, 3, 0))
  2295. res = stats.mode(a, axis=1, keepdims=False)
  2296. assert res.mode.shape == res.count.shape == (1, 3, 0)
  2297. res = stats.mode(a, axis=1, keepdims=True)
  2298. assert res.mode.shape == res.count.shape == (1, 1, 3, 0)
  2299. def test_keepdims_nonempty(selfself, xp):
  2300. # test nan_policy='propagate'
  2301. a = xp.asarray([[1, 3, 3, np.nan], [1, 1, np.nan, 1]])
  2302. res = stats.mode(a, axis=1, keepdims=False)
  2303. xp_assert_equal(res.mode, xp.asarray([3., 1.]))
  2304. xp_assert_equal(res.count, xp.asarray([2, 3]))
  2305. res = stats.mode(a, axis=1, keepdims=True)
  2306. xp_assert_equal(res.mode, xp.asarray([[3.], [1.]]))
  2307. xp_assert_equal(res.count, xp.asarray([[2], [3]]))
  2308. a = xp.asarray(a)
  2309. res = stats.mode(a, axis=None, keepdims=False)
  2310. ref = stats.mode(xp_ravel(a), keepdims=False)
  2311. xp_assert_equal(res.mode, ref.mode)
  2312. xp_assert_equal(res.count, ref.count)
  2313. assert res.mode.shape == ref.mode.shape == ()
  2314. res = stats.mode(a, axis=None, keepdims=True)
  2315. ref = stats.mode(xp_ravel(a), keepdims=True)
  2316. xp_assert_equal(xp_ravel(res.mode), xp_ravel(ref.mode))
  2317. assert res.mode.shape == (1, 1)
  2318. xp_assert_equal(xp_ravel(res.count), xp_ravel(ref.count))
  2319. assert res.count.shape == (1, 1)
  2320. def test_keepdims_nan_omit(self):
  2321. # test nan_policy='omit'
  2322. a = [[1, np.nan, np.nan, np.nan, 1],
  2323. [np.nan, np.nan, np.nan, np.nan, 2],
  2324. [1, 2, np.nan, 5, 5]]
  2325. res = stats.mode(a, axis=1, keepdims=False, nan_policy='omit')
  2326. assert_array_equal(res.mode, [1, 2, 5])
  2327. assert_array_equal(res.count, [2, 1, 2])
  2328. res = stats.mode(a, axis=1, keepdims=True, nan_policy='omit')
  2329. assert_array_equal(res.mode, [[1], [2], [5]])
  2330. assert_array_equal(res.count, [[2], [1], [2]])
  2331. a = np.array(a)
  2332. res = stats.mode(a, axis=None, keepdims=False, nan_policy='omit')
  2333. ref = stats.mode(a.ravel(), keepdims=False, nan_policy='omit')
  2334. assert_array_equal(res, ref)
  2335. assert res.mode.shape == ref.mode.shape == ()
  2336. res = stats.mode(a, axis=None, keepdims=True, nan_policy='omit')
  2337. ref = stats.mode(a.ravel(), keepdims=True, nan_policy='omit')
  2338. assert_equal(res.mode.ravel(), ref.mode.ravel())
  2339. assert res.mode.shape == (1, 1)
  2340. assert_equal(res.count.ravel(), ref.count.ravel())
  2341. assert res.count.shape == (1, 1)
  2342. @pytest.mark.parametrize("nan_policy", ['propagate', 'omit'])
  2343. def test_gh16955(self, nan_policy):
  2344. # Check that bug reported in gh-16955 is resolved
  2345. shape = (4, 3)
  2346. data = np.ones(shape)
  2347. data[0, 0] = np.nan
  2348. res = stats.mode(a=data, axis=1, keepdims=False, nan_policy=nan_policy)
  2349. assert_array_equal(res.mode, [1, 1, 1, 1])
  2350. assert_array_equal(res.count, [2, 3, 3, 3])
  2351. # Test with input from gh-16595. Support for non-numeric input
  2352. # was deprecated, so check for the appropriate error.
  2353. my_dtype = np.dtype([('asdf', np.uint8), ('qwer', np.float64, (3,))])
  2354. test = np.zeros(10, dtype=my_dtype)
  2355. message = "Argument `a` is not....|An argument has dtype...|The DType..."
  2356. with pytest.raises(TypeError, match=message):
  2357. stats.mode(test, nan_policy=nan_policy)
  2358. def test_gh9955(self):
  2359. # The behavior of mode with empty slices (whether the input was empty
  2360. # or all elements were omitted) was inconsistent. Test that this is
  2361. # resolved: the mode of an empty slice is NaN and the count is zero.
  2362. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  2363. res = stats.mode([])
  2364. ref = (np.nan, 0)
  2365. assert_equal(res, ref)
  2366. with pytest.warns(SmallSampleWarning, match=too_small_1d_omit):
  2367. res = stats.mode([np.nan], nan_policy='omit')
  2368. assert_equal(res, ref)
  2369. a = [[10., 20., 20.], [np.nan, np.nan, np.nan]]
  2370. with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
  2371. res = stats.mode(a, axis=1, nan_policy='omit')
  2372. ref = ([20, np.nan], [2, 0])
  2373. assert_equal(res, ref)
  2374. res = stats.mode(a, axis=1, nan_policy='propagate')
  2375. ref = ([20, np.nan], [2, 3])
  2376. assert_equal(res, ref)
  2377. z = np.array([[], []])
  2378. with pytest.warns(SmallSampleWarning, match=too_small_nd_not_omit):
  2379. res = stats.mode(z, axis=1)
  2380. ref = ([np.nan, np.nan], [0, 0])
  2381. assert_equal(res, ref)
  2382. @pytest.mark.filterwarnings('ignore::RuntimeWarning') # np.mean warns
  2383. @pytest.mark.parametrize('z', [np.empty((0, 1, 2)), np.empty((1, 1, 2))])
  2384. def test_gh17214(self, z, xp):
  2385. z = xp.asarray(z)
  2386. if z.size == 0:
  2387. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  2388. res = stats.mode(z, axis=None, keepdims=True)
  2389. else:
  2390. res = stats.mode(z, axis=None, keepdims=True)
  2391. ref = xp.mean(z, axis=None, keepdims=True)
  2392. assert res[0].shape == res[1].shape == ref.shape == (1, 1, 1)
  2393. def test_raise_non_numeric_gh18254(self):
  2394. class ArrLike:
  2395. def __init__(self, x):
  2396. self._x = x
  2397. def __array__(self, dtype=None, copy=None):
  2398. return self._x.astype(object)
  2399. message = ("...only boolean and numerical dtypes..." if SCIPY_ARRAY_API
  2400. else "Cannot interpret...")
  2401. with pytest.raises(TypeError, match=message):
  2402. stats.mode(ArrLike(np.arange(3)))
  2403. message = ("...only boolean and numerical dtypes..." if SCIPY_ARRAY_API
  2404. else "Argument `a` is not recognized as numeric.")
  2405. with pytest.raises(TypeError, match=message):
  2406. stats.mode(np.arange(3, dtype=object))
  2407. @make_xp_test_case(stats.sem)
  2408. class TestSEM:
  2409. testcase = [1., 2., 3., 4.]
  2410. scalar_testcase = 4.
  2411. @pytest.mark.filterwarnings("ignore:invalid value encountered in divide")
  2412. def test_sem_scalar(self, xp):
  2413. # This is not in R, so used:
  2414. # sqrt(var(testcase)*3/4)/sqrt(3)
  2415. # y = stats.sem(self.shoes[0])
  2416. # assert_approx_equal(y,0.775177399)
  2417. scalar_testcase = xp.asarray(self.scalar_testcase)[()]
  2418. if is_numpy(xp):
  2419. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  2420. y = stats.sem(scalar_testcase)
  2421. else:
  2422. # Other array types can emit a variety of warnings.
  2423. with warnings.catch_warnings():
  2424. warnings.simplefilter("ignore", UserWarning)
  2425. warnings.simplefilter("ignore", RuntimeWarning)
  2426. y = stats.sem(scalar_testcase)
  2427. assert xp.isnan(y)
  2428. def test_sem(self, xp):
  2429. testcase = xp.asarray(self.testcase)
  2430. y = stats.sem(testcase)
  2431. xp_assert_close(y, xp.asarray(0.6454972244))
  2432. n = len(self.testcase)
  2433. xp_assert_close(stats.sem(testcase, ddof=0) * (n/(n-2))**0.5,
  2434. stats.sem(testcase, ddof=2))
  2435. x = xp.arange(10.)
  2436. x = xp.where(x == 9, xp.nan, x)
  2437. xp_assert_equal(stats.sem(x), xp.asarray(xp.nan))
  2438. @skip_xp_backends(np_only=True,
  2439. reason='`nan_policy` only supports NumPy backend')
  2440. def test_sem_nan_policy(self, xp):
  2441. x = np.arange(10.)
  2442. x[9] = np.nan
  2443. assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769)
  2444. assert_raises(ValueError, stats.sem, x, nan_policy='raise')
  2445. assert_raises(ValueError, stats.sem, x, nan_policy='foobar')
  2446. @make_xp_test_case(stats.zmap)
  2447. class TestZmap:
  2448. @pytest.mark.parametrize(
  2449. 'x, y',
  2450. [([1., 2., 3., 4.], [1., 2., 3., 4.]),
  2451. ([1., 2., 3.], [0., 1., 2., 3., 4.])]
  2452. )
  2453. def test_zmap(self, x, y, xp):
  2454. # For these simple cases, calculate the expected result directly
  2455. # by using the formula for the z-score.
  2456. x, y = xp.asarray(x), xp.asarray(y)
  2457. expected = (x - xp.mean(y)) / xp.std(y, correction=0)
  2458. z = stats.zmap(x, y)
  2459. xp_assert_close(z, expected)
  2460. def test_zmap_axis(self, xp):
  2461. # Test use of 'axis' keyword in zmap.
  2462. x = xp.asarray([[0.0, 0.0, 1.0, 1.0],
  2463. [1.0, 1.0, 1.0, 2.0],
  2464. [2.0, 0.0, 2.0, 0.0]])
  2465. t1 = 1.0/(2.0/3)**0.5
  2466. t2 = 3.**0.5/3
  2467. t3 = 2.**0.5
  2468. z0 = stats.zmap(x, x, axis=0)
  2469. z1 = stats.zmap(x, x, axis=1)
  2470. z0_expected = [[-t1, -t3/2, -t3/2, 0.0],
  2471. [0.0, t3, -t3/2, t1],
  2472. [t1, -t3/2, t3, -t1]]
  2473. z1_expected = [[-1.0, -1.0, 1.0, 1.0],
  2474. [-t2, -t2, -t2, 3.**0.5],
  2475. [1.0, -1.0, 1.0, -1.0]]
  2476. z0_expected = xp.asarray(z0_expected)
  2477. z1_expected = xp.asarray(z1_expected)
  2478. xp_assert_close(z0, z0_expected)
  2479. xp_assert_close(z1, z1_expected)
  2480. def test_zmap_ddof(self, xp):
  2481. # Test use of 'ddof' keyword in zmap.
  2482. x = xp.asarray([[0.0, 0.0, 1.0, 1.0],
  2483. [0.0, 1.0, 2.0, 3.0]])
  2484. z = stats.zmap(x, x, axis=1, ddof=1)
  2485. z0_expected = xp.asarray([-0.5, -0.5, 0.5, 0.5])/(1.0/3**0.5)
  2486. z1_expected = xp.asarray([-1.5, -0.5, 0.5, 1.5])/(5./3)**0.5
  2487. xp_assert_close(z[0, :], z0_expected)
  2488. xp_assert_close(z[1, :], z1_expected)
  2489. @pytest.mark.parametrize('ddof', [0, 2])
  2490. def test_zmap_nan_policy_omit(self, ddof, xp):
  2491. # nans in `scores` are propagated, regardless of `nan_policy`.
  2492. # `nan_policy` only affects how nans in `compare` are handled.
  2493. scores = xp.asarray([-3, -1, 2, np.nan])
  2494. compare = xp.asarray([-8, -3, 2, 7, 12, np.nan])
  2495. z = stats.zmap(scores, compare, ddof=ddof, nan_policy='omit')
  2496. # exclude nans from compare, don't use isnan + mask since that messes up
  2497. # dask
  2498. ref = stats.zmap(scores, compare[:5], ddof=ddof)
  2499. xp_assert_close(z, ref)
  2500. @pytest.mark.parametrize('ddof', [0, 2])
  2501. def test_zmap_nan_policy_omit_with_axis(self, ddof, xp):
  2502. scores = xp.reshape(xp.arange(-5.0, 9.0), (2, -1))
  2503. compare = np.reshape(np.linspace(-8, 6, 24), (2, -1))
  2504. compare[0, 4] = np.nan
  2505. compare[0, 6] = np.nan
  2506. compare[1, 1] = np.nan
  2507. # convert from numpy since some libraries like dask
  2508. # can't handle the data-dependent shapes from the isnan masking
  2509. compare_0_notna = xp.asarray(compare[0, :][~np.isnan(compare[0, :])])
  2510. compare_1_notna = xp.asarray(compare[1, :][~np.isnan(compare[1, :])])
  2511. compare = xp.asarray(compare)
  2512. z = stats.zmap(scores, compare, nan_policy='omit', axis=1, ddof=ddof)
  2513. res0 = stats.zmap(scores[0, :], compare_0_notna,
  2514. ddof=ddof)
  2515. res1 = stats.zmap(scores[1, :], compare_1_notna,
  2516. ddof=ddof)
  2517. expected = xp.stack((res0, res1))
  2518. xp_assert_close(z, expected)
  2519. @skip_xp_backends(eager_only=True, reason="lazy arrays don't do 'raise'.")
  2520. def test_zmap_nan_policy_raise(self, xp):
  2521. scores = xp.asarray([1, 2, 3])
  2522. compare = xp.asarray([-8, -3, 2, 7, 12, xp.nan])
  2523. with pytest.raises(ValueError, match='input contains nan'):
  2524. stats.zmap(scores, compare, nan_policy='raise')
  2525. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  2526. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  2527. def test_degenerate_input(self, xp):
  2528. scores = xp.arange(3)
  2529. compare = xp.ones(3)
  2530. ref = xp.asarray([-xp.inf, xp.nan, xp.inf])
  2531. with eager_warns(RuntimeWarning, match="Precision loss occurred...", xp=xp):
  2532. res = stats.zmap(scores, compare)
  2533. xp_assert_equal(res, ref)
  2534. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  2535. def test_complex_gh22404(self, xp):
  2536. res = stats.zmap(xp.asarray([1, 2, 3, 4]), xp.asarray([1, 1j, -1, -1j]))
  2537. ref = xp.asarray([1.+0.j, 2.+0.j, 3.+0.j, 4.+0.j])
  2538. xp_assert_close(res, ref)
  2539. @make_xp_test_case(stats.zscore)
  2540. class TestZscore:
  2541. def test_zscore(self, xp):
  2542. # not in R, so tested by using:
  2543. # (testcase[i] - mean(testcase, axis=0)) / sqrt(var(testcase) * 3/4)
  2544. y = stats.zscore(xp.asarray([1, 2, 3, 4]))
  2545. desired = [-1.3416407864999, -0.44721359549996,
  2546. 0.44721359549996, 1.3416407864999]
  2547. xp_assert_close(y, xp.asarray(desired))
  2548. def test_zscore_axis(self, xp):
  2549. # Test use of 'axis' keyword in zscore.
  2550. x = xp.asarray([[0.0, 0.0, 1.0, 1.0],
  2551. [1.0, 1.0, 1.0, 2.0],
  2552. [2.0, 0.0, 2.0, 0.0]])
  2553. t1 = 1.0/(2.0/3)**0.5
  2554. t2 = 3**0.5/3
  2555. t3 = 2**0.5
  2556. z0 = stats.zscore(x, axis=0)
  2557. z1 = stats.zscore(x, axis=1)
  2558. z0_expected = [[-t1, -t3/2, -t3/2, 0.0],
  2559. [0.0, t3, -t3/2, t1],
  2560. [t1, -t3/2, t3, -t1]]
  2561. z1_expected = [[-1.0, -1.0, 1.0, 1.0],
  2562. [-t2, -t2, -t2, 3**0.5],
  2563. [1.0, -1.0, 1.0, -1.0]]
  2564. xp_assert_close(z0, xp.asarray(z0_expected))
  2565. xp_assert_close(z1, xp.asarray(z1_expected))
  2566. def test_zscore_ddof(self, xp):
  2567. # Test use of 'ddof' keyword in zscore.
  2568. x = xp.asarray([[0.0, 0.0, 1.0, 1.0],
  2569. [0.0, 1.0, 2.0, 3.0]])
  2570. z = stats.zscore(x, axis=1, ddof=1)
  2571. z0_expected = xp.asarray([-0.5, -0.5, 0.5, 0.5])/(1.0/3**0.5)
  2572. z1_expected = xp.asarray([-1.5, -0.5, 0.5, 1.5])/((5./3)**0.5)
  2573. xp_assert_close(z[0, :], z0_expected)
  2574. xp_assert_close(z[1, :], z1_expected)
  2575. def test_zscore_nan_propagate(self, xp):
  2576. x = xp.asarray([1, 2, np.nan, 4, 5])
  2577. z = stats.zscore(x, nan_policy='propagate')
  2578. xp_assert_equal(z, xp.full(x.shape, xp.nan))
  2579. def test_zscore_nan_omit(self, xp):
  2580. x = xp.asarray([1, 2, xp.nan, 4, 5])
  2581. z = stats.zscore(x, nan_policy='omit')
  2582. expected = xp.asarray([-1.2649110640673518,
  2583. -0.6324555320336759,
  2584. xp.nan,
  2585. 0.6324555320336759,
  2586. 1.2649110640673518
  2587. ])
  2588. xp_assert_close(z, expected)
  2589. def test_zscore_nan_omit_with_ddof(self, xp):
  2590. x = xp.asarray([xp.nan, 1.0, 3.0, 5.0, 7.0, 9.0])
  2591. z = stats.zscore(x, ddof=1, nan_policy='omit')
  2592. expected = xp.concat([xp.asarray([xp.nan]), stats.zscore(x[1:], ddof=1)])
  2593. xp_assert_close(z, expected)
  2594. @skip_xp_backends(eager_only=True, reason="lazy arrays don't do 'raise'.")
  2595. def test_zscore_nan_raise(self, xp):
  2596. x = xp.asarray([1, 2, xp.nan, 4, 5])
  2597. with pytest.raises(ValueError, match="The input contains nan..."):
  2598. stats.zscore(x, nan_policy='raise')
  2599. def test_zscore_constant_input_1d(self, xp):
  2600. x = xp.asarray([-0.087] * 3)
  2601. with eager_warns(RuntimeWarning, match="Precision loss occurred...", xp=xp):
  2602. z = stats.zscore(x)
  2603. xp_assert_equal(z, xp.full(x.shape, xp.nan))
  2604. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  2605. def test_zscore_constant_input_2d(self, xp):
  2606. x = xp.asarray([[10.0, 10.0, 10.0, 10.0],
  2607. [10.0, 11.0, 12.0, 13.0]])
  2608. with eager_warns(RuntimeWarning, match="Precision loss occurred...", xp=xp):
  2609. z0 = stats.zscore(x, axis=0)
  2610. xp_assert_close(z0, xp.asarray([[xp.nan, -1.0, -1.0, -1.0],
  2611. [xp.nan, 1.0, 1.0, 1.0]]))
  2612. with eager_warns(RuntimeWarning, match="Precision loss occurred...", xp=xp):
  2613. z1 = stats.zscore(x, axis=1)
  2614. xp_assert_equal(z1, xp.stack([xp.asarray([xp.nan, xp.nan, xp.nan, xp.nan]),
  2615. stats.zscore(x[1, :])]))
  2616. z = stats.zscore(x, axis=None)
  2617. xp_assert_equal(z, xp.reshape(stats.zscore(xp.reshape(x, (-1,))), x.shape))
  2618. y = xp.ones((3, 6))
  2619. with eager_warns(RuntimeWarning, match="Precision loss occurred...", xp=xp):
  2620. z = stats.zscore(y, axis=None)
  2621. xp_assert_equal(z, xp.full_like(y, xp.nan))
  2622. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  2623. def test_zscore_constant_input_2d_nan_policy_omit(self, xp):
  2624. x = xp.asarray([[10.0, 10.0, 10.0, 10.0],
  2625. [10.0, 11.0, 12.0, xp.nan],
  2626. [10.0, 12.0, xp.nan, 10.0]])
  2627. s = (3/2)**0.5
  2628. s2 = 2**0.5
  2629. with eager_warns(RuntimeWarning, match="Precision loss occurred...", xp=xp):
  2630. z0 = stats.zscore(x, nan_policy='omit', axis=0)
  2631. xp_assert_close(z0, xp.asarray([[xp.nan, -s, -1.0, xp.nan],
  2632. [xp.nan, 0, 1.0, xp.nan],
  2633. [xp.nan, s, xp.nan, xp.nan]]))
  2634. with eager_warns(RuntimeWarning, match="Precision loss occurred...", xp=xp):
  2635. z1 = stats.zscore(x, nan_policy='omit', axis=1)
  2636. xp_assert_close(z1, xp.asarray([[xp.nan, xp.nan, xp.nan, xp.nan],
  2637. [-s, 0, s, xp.nan],
  2638. [-s2/2, s2, xp.nan, -s2/2]]))
  2639. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  2640. def test_zscore_2d_all_nan_row(self, xp):
  2641. # A row is all nan, and we use axis=1.
  2642. x = xp.asarray([[np.nan, np.nan, np.nan, np.nan],
  2643. [10.0, 10.0, 12.0, 12.0]])
  2644. z = stats.zscore(x, nan_policy='omit', axis=1)
  2645. xp_assert_close(z, xp.asarray([[np.nan, np.nan, np.nan, np.nan],
  2646. [-1.0, -1.0, 1.0, 1.0]]))
  2647. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  2648. def test_zscore_2d_all_nan(self, xp):
  2649. # The entire 2d array is nan, and we use axis=None.
  2650. y = xp.full((2, 3), xp.nan)
  2651. z = stats.zscore(y, nan_policy='omit', axis=None)
  2652. xp_assert_equal(z, y)
  2653. @pytest.mark.parametrize('x', [np.array([]), np.zeros((3, 0, 5))])
  2654. def test_zscore_empty_input(self, x, xp):
  2655. x = xp.asarray(x)
  2656. z = stats.zscore(x)
  2657. xp_assert_equal(z, x)
  2658. @skip_xp_invalid_arg
  2659. def test_zscore_masked_element_0_gh19039(self, xp):
  2660. # zscore returned all NaNs when 0th element was masked. See gh-19039.
  2661. rng = np.random.default_rng(8675309)
  2662. x = rng.standard_normal(10)
  2663. mask = np.zeros_like(x)
  2664. y = np.ma.masked_array(x, mask)
  2665. y.mask[0] = True
  2666. ref = stats.zscore(x[1:]) # compute reference from non-masked elements
  2667. assert not np.any(np.isnan(ref))
  2668. res = stats.zscore(y)
  2669. assert_allclose(res[1:], ref)
  2670. res = stats.zscore(y, axis=None)
  2671. assert_allclose(res[1:], ref)
  2672. y[1:] = y[1] # when non-masked elements are identical, result is nan
  2673. with pytest.warns(RuntimeWarning, match="Precision loss occurred..."):
  2674. res = stats.zscore(y)
  2675. assert_equal(res[1:], np.nan)
  2676. with pytest.warns(RuntimeWarning, match="Precision loss occurred..."):
  2677. res = stats.zscore(y, axis=None)
  2678. assert_equal(res[1:], np.nan)
  2679. @make_xp_test_case(stats.gzscore)
  2680. class TestGZscore:
  2681. def test_gzscore_normal_array(self, xp):
  2682. x = np.asarray([1, 2, 3, 4])
  2683. z = stats.gzscore(xp.asarray(x))
  2684. desired = np.log(x / stats.gmean(x)) / np.log(stats.gstd(x, ddof=0))
  2685. xp_assert_close(z, xp.asarray(desired, dtype=xp.asarray(1.).dtype))
  2686. @skip_xp_invalid_arg
  2687. def test_gzscore_masked_array(self):
  2688. x = np.array([1, 2, -1, 3, 4])
  2689. mask = [0, 0, 1, 0, 0]
  2690. mx = np.ma.masked_array(x, mask=mask)
  2691. z = stats.gzscore(mx)
  2692. desired = ([-1.526072095151, -0.194700599824, np.inf, 0.584101799472,
  2693. 1.136670895503])
  2694. desired = np.ma.masked_array(desired, mask=mask)
  2695. assert_allclose(z.compressed(), desired.compressed())
  2696. assert_allclose(z.mask, desired.mask)
  2697. assert isinstance(z, np.ma.MaskedArray)
  2698. @make_xp_test_case(stats.median_abs_deviation)
  2699. class TestMedianAbsDeviation:
  2700. def setup_method(self):
  2701. self.dat_nan = [2.20, 2.20, 2.4, 2.4, 2.5, 2.7, 2.8, 2.9,
  2702. 3.03, 3.03, 3.10, 3.37, 3.4, 3.4, 3.4, 3.5,
  2703. 3.6, 3.7, 3.7, 3.7, 3.7, 3.77, 5.28, np.nan]
  2704. self.dat = [2.20, 2.20, 2.4, 2.4, 2.5, 2.7, 2.8, 2.9, 3.03,
  2705. 3.03, 3.10, 3.37, 3.4, 3.4, 3.4, 3.5, 3.6, 3.7,
  2706. 3.7, 3.7, 3.7, 3.77, 5.28, 28.95]
  2707. def test_median_abs_deviation(self, xp):
  2708. xp_assert_close(stats.median_abs_deviation(xp.asarray(self.dat), axis=None),
  2709. xp.asarray(0.355))
  2710. dat = xp.reshape(xp.asarray(self.dat), (6, 4))
  2711. mad = stats.median_abs_deviation(dat, axis=0)
  2712. mad_expected = xp.asarray([0.435, 0.5, 0.45, 0.4])
  2713. xp_assert_close(mad, mad_expected)
  2714. def test_mad_nan_omit(self, xp):
  2715. mad = stats.median_abs_deviation(xp.asarray(self.dat_nan), nan_policy='omit')
  2716. xp_assert_close(mad, xp.asarray(0.34))
  2717. def test_axis_and_nan(self, xp):
  2718. x = xp.asarray([[1.0, 2.0, 3.0, 4.0, np.nan],
  2719. [1.0, 4.0, 5.0, 8.0, 9.0]])
  2720. mad = stats.median_abs_deviation(x, axis=1)
  2721. xp_assert_close(mad, xp.asarray([np.nan, 3.0]))
  2722. def test_nan_policy_omit_with_inf(self, xp):
  2723. z = xp.asarray([1, 3, 4, 6, 99, np.nan, np.inf])
  2724. mad = stats.median_abs_deviation(z, nan_policy='omit')
  2725. xp_assert_close(mad, xp.asarray(3.0))
  2726. @pytest.mark.parametrize('axis', [0, 1, 2, None])
  2727. def test_size_zero_with_axis(self, axis, xp):
  2728. x = xp.zeros((3, 0, 4))
  2729. context = (eager_warns(SmallSampleWarning, match='too small', xp=np)
  2730. if axis in {1, None} else contextlib.nullcontext())
  2731. with context:
  2732. mad = stats.median_abs_deviation(x, axis=axis)
  2733. xp_assert_close(mad, xp.full_like(xp.sum(x, axis=axis), fill_value=xp.nan))
  2734. @pytest.mark.parametrize('nan_policy, expected',
  2735. [('omit', [np.nan, 1.5, 1.5]),
  2736. ('propagate', [np.nan, np.nan, 1.5])])
  2737. def test_nan_policy_with_axis(self, nan_policy, expected, xp):
  2738. if nan_policy=='omit' and not is_numpy(xp):
  2739. pytest.skip("nan_policy='omit' with n-d input only supported by NumPy")
  2740. x = xp.asarray([[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
  2741. [1, 5, 3, 6, np.nan, np.nan],
  2742. [5, 6, 7, 9, 9, 10]])
  2743. context = (eager_warns(SmallSampleWarning, match="too small", xp=np)
  2744. if nan_policy == 'omit' else contextlib.nullcontext())
  2745. with context:
  2746. mad = stats.median_abs_deviation(x, nan_policy=nan_policy, axis=1)
  2747. xp_assert_close(mad, xp.asarray(expected))
  2748. @pytest.mark.parametrize('axis, expected',
  2749. [(1, [2.5, 2.0, 12.0]), (None, 4.5)])
  2750. def test_center_mean_with_nan(self, axis, expected):
  2751. # nan_policy='omit' with multidimensional input only supported by NumPy
  2752. x = np.array([[1, 2, 4, 9, np.nan],
  2753. [0, 1, 1, 1, 12],
  2754. [-10, -10, -10, 20, 20]])
  2755. mad = stats.median_abs_deviation(x, center=np.mean, nan_policy='omit',
  2756. axis=axis)
  2757. xp_assert_close(mad, expected, rtol=1e-15, atol=1e-15)
  2758. def test_center_not_callable(self, xp):
  2759. with pytest.raises(TypeError, match='callable'):
  2760. stats.median_abs_deviation(xp.asarray([1, 2, 3, 5]), center=99)
  2761. def _check_warnings(warn_list, expected_type, expected_len):
  2762. """
  2763. Checks that all of the warnings from a list returned by
  2764. `warnings.catch_all(record=True)` are of the required type and that the list
  2765. contains expected number of warnings.
  2766. """
  2767. assert_equal(len(warn_list), expected_len, "number of warnings")
  2768. for warn_ in warn_list:
  2769. assert_(warn_.category is expected_type)
  2770. @make_xp_test_case(stats.iqr)
  2771. class TestIQR:
  2772. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  2773. def test_basic(self, dtype, xp):
  2774. x = np.arange(8) * 0.5
  2775. np.random.shuffle(x)
  2776. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  2777. xp_assert_equal(stats.iqr(xp.asarray(x, dtype=dtype)),
  2778. xp.asarray(1.75, dtype=dtype))
  2779. def test_api(self, xp):
  2780. d = xp.ones((5, 5))
  2781. stats.iqr(d)
  2782. stats.iqr(d, None)
  2783. stats.iqr(d, 1)
  2784. stats.iqr(d, (0, 1))
  2785. stats.iqr(d, None, (10, 90))
  2786. stats.iqr(d, None, (30, 20), 1.0)
  2787. stats.iqr(d, None, (25, 75), 1.5, 'propagate')
  2788. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no nan_policy')
  2789. def test_api_eager(self, xp):
  2790. d = xp.ones((5, 5))
  2791. stats.iqr(d, None, (50, 50), 'normal', 'raise', 'linear')
  2792. stats.iqr(d, None, (25, 75), -0.4, 'omit', 'lower', True)
  2793. @pytest.mark.parametrize('x', [[], np.arange(0)])
  2794. def test_empty(self, x, xp):
  2795. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  2796. xp_assert_equal(stats.iqr(xp.asarray(x)), xp.asarray(xp.nan))
  2797. def test_constant(self, xp):
  2798. # Constant array always gives 0
  2799. x = xp.ones((7, 4))
  2800. zero = xp.asarray(0.0)
  2801. xp_assert_equal(stats.iqr(x), zero)
  2802. xp_assert_equal(stats.iqr(x, axis=0), xp.zeros(4))
  2803. xp_assert_equal(stats.iqr(x, axis=1), xp.zeros(7))
  2804. xp_assert_equal(stats.iqr(x, interpolation='linear'), zero)
  2805. xp_assert_equal(stats.iqr(x, interpolation='midpoint'), zero)
  2806. xp_assert_equal(stats.iqr(x, interpolation='nearest'), zero)
  2807. xp_assert_equal(stats.iqr(x, interpolation='lower'), zero)
  2808. xp_assert_equal(stats.iqr(x, interpolation='higher'), zero)
  2809. # 0 only along constant dimensions
  2810. # This also tests much of `axis`
  2811. y = xp.ones((4, 5, 6)) * xp.arange(6.)
  2812. xp_assert_equal(stats.iqr(y, axis=0), xp.zeros((5, 6)))
  2813. xp_assert_equal(stats.iqr(y, axis=1), xp.zeros((4, 6)))
  2814. xp_assert_equal(stats.iqr(y, axis=2), xp.full((4, 5), 2.5))
  2815. xp_assert_equal(stats.iqr(y, axis=(0, 1)), xp.zeros(6))
  2816. xp_assert_equal(stats.iqr(y, axis=(0, 2)), xp.full(5, 3.))
  2817. xp_assert_equal(stats.iqr(y, axis=(1, 2)), xp.full(4, 3.))
  2818. def test_scalarlike(self, xp):
  2819. x = xp.arange(1.) + 7.0
  2820. xp_assert_equal(stats.iqr(x[0]), xp.asarray(0.0))
  2821. xp_assert_equal(stats.iqr(x), xp.asarray(0.0))
  2822. xp_assert_equal(stats.iqr(x, keepdims=True), xp.asarray([0.0]))
  2823. def test_2D(self, xp):
  2824. x = xp.reshape(xp.arange(15), (3, 5))
  2825. xp_assert_equal(stats.iqr(x), xp.asarray(7.0))
  2826. xp_assert_equal(stats.iqr(x, axis=0), xp.full(5, 5.))
  2827. xp_assert_equal(stats.iqr(x, axis=1), xp.full(3, 2.))
  2828. xp_assert_equal(stats.iqr(x, axis=(0, 1)), xp.asarray(7.0))
  2829. xp_assert_equal(stats.iqr(x, axis=(1, 0)), xp.asarray(7.0))
  2830. def test_axis(self, xp):
  2831. # The `axis` keyword is also put through its paces in `test_keepdims`.
  2832. o = np.random.normal(size=(71, 23))
  2833. x = np.dstack([o] * 10) # x.shape = (71, 23, 10)
  2834. o, x = xp.asarray(o), xp.asarray(x)
  2835. q = xp.broadcast_to(stats.iqr(o), (10,))
  2836. xp_assert_equal(stats.iqr(x, axis=(0, 1)), q)
  2837. x = xp.moveaxis(x, -1, 0) # x.shape = (10, 71, 23)
  2838. xp_assert_equal(stats.iqr(x, axis=(2, 1)), q)
  2839. x = xp_swapaxes(x,0, 1, xp=xp) # x.shape = (71, 10, 23)
  2840. xp_assert_equal(stats.iqr(x, axis=(0, 2)), q)
  2841. x = xp_swapaxes(x,0, 1, xp=xp) # x.shape = (10, 71, 23)
  2842. xp_assert_equal(stats.iqr(x, axis=(0, 1, 2)),
  2843. stats.iqr(x, axis=None))
  2844. xp_assert_equal(stats.iqr(x, axis=(0,)),
  2845. stats.iqr(x, axis=0))
  2846. d = np.arange(3 * 5 * 7 * 11)
  2847. # Older versions of numpy only shuffle along axis=0.
  2848. # Not sure about newer, don't care.
  2849. np.random.shuffle(d)
  2850. d = d.reshape((3, 5, 7, 11))
  2851. d = xp.asarray(d)
  2852. xp_assert_equal(stats.iqr(d, axis=(0, 1, 2))[0],
  2853. stats.iqr(xp_ravel(d[:,:,:, 0])))
  2854. xp_assert_equal(stats.iqr(d, axis=(0, 1, 3))[1],
  2855. stats.iqr(xp_ravel(d[:,:, 1,:])))
  2856. xp_assert_equal(stats.iqr(d, axis=(3, 1, -4))[2],
  2857. stats.iqr(xp_ravel(d[:,:, 2,:])))
  2858. xp_assert_equal(stats.iqr(d, axis=(3, 1, 2))[2],
  2859. stats.iqr(xp_ravel(d[2,:,:,:])))
  2860. xp_assert_equal(stats.iqr(d, axis=(3, 2))[2, 1],
  2861. stats.iqr(xp_ravel(d[2, 1,:,:])))
  2862. xp_assert_equal(stats.iqr(d, axis=(1, -2))[2, 1],
  2863. stats.iqr(xp_ravel(d[2, :, :, 1])))
  2864. xp_assert_equal(stats.iqr(d, axis=(1, 3))[2, 2],
  2865. stats.iqr(xp_ravel(d[2, :, 2,:])))
  2866. with pytest.raises(AxisError, match='`axis` is out of bounds...'):
  2867. stats.iqr(d, axis=4)
  2868. with pytest.raises(ValueError, match='`axis` must contain only...'):
  2869. stats.iqr(d, axis=(0, 0))
  2870. def test_rng(self, xp):
  2871. x = xp.arange(5)
  2872. xp_assert_equal(stats.iqr(x), xp.asarray(2.))
  2873. xp_assert_equal(stats.iqr(x, rng=(25, 87.5)), xp.asarray(2.5))
  2874. xp_assert_equal(stats.iqr(x, rng=(12.5, 75)), xp.asarray(2.5))
  2875. xp_assert_equal(stats.iqr(x, rng=(10, 50)), xp.asarray(1.6)) # 3-1.4
  2876. message = r"Elements of `rng` must be in the range \[0, 100\]."
  2877. with pytest.raises(ValueError, match=message):
  2878. stats.iqr(x, rng=(0, 101))
  2879. message = "`rng` must not contain NaNs."
  2880. with pytest.raises(ValueError, match=message):
  2881. stats.iqr(x, rng=(np.nan, 25))
  2882. message = "`rng` must be a two element sequence."
  2883. with pytest.raises(TypeError, match=message):
  2884. stats.iqr(x, rng=(0, 50, 60))
  2885. def test_interpolation(self, xp):
  2886. x = xp.arange(5)
  2887. y = xp.arange(4)
  2888. # Default
  2889. xp_assert_equal(stats.iqr(x), xp.asarray(2.))
  2890. xp_assert_equal(stats.iqr(y), xp.asarray(1.5))
  2891. # Linear
  2892. xp_assert_equal(stats.iqr(x, interpolation='linear'), xp.asarray(2.))
  2893. xp_assert_equal(stats.iqr(y, interpolation='linear'), xp.asarray(1.5))
  2894. # Higher
  2895. xp_assert_equal(stats.iqr(x, interpolation='higher'), xp.asarray(2.))
  2896. xp_assert_equal(stats.iqr(x, rng=(25, 80), interpolation='higher'),
  2897. xp.asarray(3.))
  2898. xp_assert_equal(stats.iqr(y, interpolation='higher'), xp.asarray(2.))
  2899. # Lower (will generally, but not always be the same as higher)
  2900. xp_assert_equal(stats.iqr(x, interpolation='lower'), xp.asarray(2.))
  2901. xp_assert_equal(stats.iqr(x, rng=(25, 80), interpolation='lower'),
  2902. xp.asarray(2.))
  2903. xp_assert_equal(stats.iqr(y, interpolation='lower'), xp.asarray(2.))
  2904. # Nearest
  2905. xp_assert_equal(stats.iqr(x, interpolation='nearest'), xp.asarray(2.))
  2906. xp_assert_equal(stats.iqr(y, interpolation='nearest'), xp.asarray(1.))
  2907. # Midpoint
  2908. xp_assert_equal(stats.iqr(x, interpolation='midpoint'), xp.asarray(2.))
  2909. xp_assert_equal(stats.iqr(x, rng=(25, 80), interpolation='midpoint'),
  2910. xp.asarray(2.5))
  2911. xp_assert_equal(stats.iqr(y, interpolation='midpoint'), xp.asarray(2.))
  2912. # Check all method= values new in numpy 1.22.0 are accepted
  2913. for method in ('inverted_cdf', 'averaged_inverted_cdf',
  2914. 'closest_observation', 'interpolated_inverted_cdf',
  2915. 'hazen', 'weibull', 'median_unbiased',
  2916. 'normal_unbiased'):
  2917. stats.iqr(y, interpolation=method)
  2918. with pytest.raises(ValueError, match='`method` must be one of...'):
  2919. stats.iqr(x, interpolation='foobar')
  2920. def test_keepdims(self, xp):
  2921. # Also tests most of `axis`
  2922. x = xp.ones((3, 5, 7, 11))
  2923. assert_equal(stats.iqr(x, axis=None, keepdims=False).shape, ())
  2924. assert_equal(stats.iqr(x, axis=2, keepdims=False).shape, (3, 5, 11))
  2925. assert_equal(stats.iqr(x, axis=(0, 1), keepdims=False).shape, (7, 11))
  2926. assert_equal(stats.iqr(x, axis=(0, 3), keepdims=False).shape, (5, 7))
  2927. assert_equal(stats.iqr(x, axis=(1,), keepdims=False).shape, (3, 7, 11))
  2928. assert_equal(stats.iqr(x, (0, 1, 2, 3), keepdims=False).shape, ())
  2929. assert_equal(stats.iqr(x, axis=(0, 1, 3), keepdims=False).shape, (7,))
  2930. assert_equal(stats.iqr(x, axis=None, keepdims=True).shape, (1, 1, 1, 1))
  2931. assert_equal(stats.iqr(x, axis=2, keepdims=True).shape, (3, 5, 1, 11))
  2932. assert_equal(stats.iqr(x, axis=(0, 1), keepdims=True).shape, (1, 1, 7, 11))
  2933. assert_equal(stats.iqr(x, axis=(0, 3), keepdims=True).shape, (1, 5, 7, 1))
  2934. assert_equal(stats.iqr(x, axis=(1,), keepdims=True).shape, (3, 1, 7, 11))
  2935. assert_equal(stats.iqr(x, (0, 1, 2, 3), keepdims=True).shape, (1, 1, 1, 1))
  2936. assert_equal(stats.iqr(x, axis=(0, 1, 3), keepdims=True).shape, (1, 1, 7, 1))
  2937. def test_nanpolicy(self, xp):
  2938. x = xp.reshape(xp.arange(15.0), (3, 5))
  2939. # No NaNs
  2940. xp_assert_equal(stats.iqr(x, nan_policy='propagate'), xp.asarray(7.))
  2941. xp_assert_equal(stats.iqr(x, nan_policy='omit'), xp.asarray(7.))
  2942. xp_assert_equal(stats.iqr(x, nan_policy='raise'), xp.asarray(7.))
  2943. # Yes NaNs
  2944. x = xpx.at(x)[1, 2].set(xp.nan)
  2945. xp_assert_equal(stats.iqr(x, nan_policy='propagate'),
  2946. xp.asarray(xp.nan))
  2947. xp_assert_equal(stats.iqr(x, axis=0, nan_policy='propagate'),
  2948. xp.asarray([5, 5, xp.nan, 5, 5]))
  2949. xp_assert_equal(stats.iqr(x, axis=1, nan_policy='propagate'),
  2950. xp.asarray([2, xp.nan, 2]))
  2951. xp_assert_equal(stats.iqr(x, nan_policy='omit'), xp.asarray(7.5))
  2952. message = "The input contains nan values"
  2953. with pytest.raises(ValueError, match=message):
  2954. stats.iqr(x, nan_policy='raise')
  2955. # Bad policy
  2956. message = "nan_policy must be one of..."
  2957. with pytest.raises(ValueError, match=message):
  2958. stats.iqr(x, nan_policy='barfood')
  2959. @pytest.mark.skip_xp_backends(np_only=True,
  2960. reason="nan_policy w/ multidimensional arrays only available w/ NumPy")
  2961. def test_nanpolicy_nd(self, xp):
  2962. x = xp.reshape(xp.arange(15.0), (3, 5))
  2963. x[1, 2] = xp.nan
  2964. xp_assert_equal(stats.iqr(x, axis=0, nan_policy='omit'),
  2965. xp.full(5, 5.))
  2966. xp_assert_equal(stats.iqr(x, axis=1, nan_policy='omit'),
  2967. xp.asarray([2, 2.5, 2]))
  2968. message = "The input contains nan values"
  2969. with pytest.raises(ValueError, match=message):
  2970. stats.iqr(x, axis=0, nan_policy='raise')
  2971. with pytest.raises(ValueError, match=message):
  2972. stats.iqr(x, axis=1, nan_policy='raise')
  2973. def test_scale(self, xp):
  2974. x = xp.reshape(xp.arange(15.0), (3, 5))
  2975. # No NaNs
  2976. xp_assert_equal(stats.iqr(x, scale=1.0), xp.asarray(7.))
  2977. xp_assert_close(stats.iqr(x, scale='normal'), xp.asarray(7 / 1.3489795))
  2978. xp_assert_equal(stats.iqr(x, scale=2.0), xp.asarray(3.5))
  2979. # Yes NaNs
  2980. x = xpx.at(x)[1, 2].set(xp.nan)
  2981. nan = xp.asarray(xp.nan)
  2982. xp_assert_equal(stats.iqr(x, scale=1.0, nan_policy='propagate'), nan)
  2983. xp_assert_equal(stats.iqr(x, scale='normal', nan_policy='propagate'), nan)
  2984. xp_assert_equal(stats.iqr(x, scale=2.0, nan_policy='propagate'), nan)
  2985. xp_assert_equal(stats.iqr(x, scale=1.0, nan_policy='omit'), xp.asarray(7.5))
  2986. xp_assert_close(stats.iqr(x, scale='normal', nan_policy='omit'),
  2987. xp.asarray(7.5 / 1.3489795))
  2988. xp_assert_equal(stats.iqr(x, scale=2.0, nan_policy='omit'), xp.asarray(3.75))
  2989. # # Bad scale
  2990. message = "foobar not a valid scale for `iqr`"
  2991. with pytest.raises(ValueError, match=message):
  2992. stats.iqr(x, scale='foobar')
  2993. @pytest.mark.skip_xp_backends(np_only=True,
  2994. reason="nan_policy w/ multidimensional arrays only available w/ NumPy")
  2995. def test_scale_nanpolicy_nd(self, xp):
  2996. # axis=1 chosen to show behavior with both nans and without
  2997. x = xp.reshape(xp.arange(15.0), (3, 5))
  2998. x = xpx.at(x)[1, 2].set(xp.nan)
  2999. xp_assert_equal(stats.iqr(x, axis=1, scale=1.0, nan_policy='propagate'),
  3000. xp.asarray([2, np.nan, 2]))
  3001. xp_assert_close(stats.iqr(x, axis=1, scale='normal', nan_policy='propagate'),
  3002. xp.asarray([2, np.nan, 2]) / 1.3489795)
  3003. xp_assert_equal(stats.iqr(x, axis=1, scale=2.0, nan_policy='propagate'),
  3004. xp.asarray([1, np.nan, 1]))
  3005. def test_rng_order(self, xp):
  3006. # test that order of `rng` doesn't matter (as documented)
  3007. x = xp.arange(8.) * 0.5
  3008. res = stats.iqr(x, rng=(75, 25))
  3009. ref = stats.iqr(x)
  3010. xp_assert_equal(res, ref)
  3011. @make_xp_test_case(stats.moment)
  3012. class TestMoments:
  3013. """
  3014. Comparison numbers are found using R v.1.5.1
  3015. note that length(testcase) = 4
  3016. testmathworks comes from documentation for the
  3017. Statistics Toolbox for Matlab and can be found at both
  3018. https://www.mathworks.com/help/stats/kurtosis.html
  3019. https://www.mathworks.com/help/stats/skewness.html
  3020. Note that both test cases came from here.
  3021. """
  3022. testcase = [1., 2., 3., 4.]
  3023. scalar_testcase = 4.
  3024. rng = np.random.default_rng(2285049930)
  3025. testcase_moment_accuracy = rng.random(42)
  3026. @pytest.mark.parametrize('size', [10, (10, 2)])
  3027. @pytest.mark.parametrize('m, c', product((0, 1, 2, 3), (None, 0, 1)))
  3028. @pytest.mark.filterwarnings(
  3029. "ignore:divide by zero encountered in divide:RuntimeWarning:dask"
  3030. )
  3031. def test_moment_center_scalar_moment(self, size, m, c, xp):
  3032. rng = np.random.default_rng(6581432544381372042)
  3033. x = xp.asarray(rng.random(size=size))
  3034. res = stats.moment(x, m, center=c)
  3035. c = xp.mean(x, axis=0) if c is None else c
  3036. ref = xp.sum((x - c)**m, axis=0)/x.shape[0]
  3037. xp_assert_close(res, ref, atol=1e-16)
  3038. @pytest.mark.parametrize('size', [10, (10, 2)])
  3039. @pytest.mark.parametrize('c', (None, 0, 1))
  3040. @pytest.mark.filterwarnings(
  3041. "ignore:divide by zero encountered in divide:RuntimeWarning:dask"
  3042. )
  3043. def test_moment_center_array_moment(self, size, c, xp):
  3044. rng = np.random.default_rng(1706828300224046506)
  3045. x = xp.asarray(rng.random(size=size))
  3046. m = [0, 1, 2, 3]
  3047. res = stats.moment(x, m, center=c)
  3048. ref = xp.concat([stats.moment(x, i, center=c)[xp.newaxis, ...] for i in m])
  3049. xp_assert_equal(res, ref)
  3050. def test_moment(self, xp):
  3051. # mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))
  3052. testcase = xp.asarray(self.testcase)
  3053. y = stats.moment(xp.asarray(self.scalar_testcase))
  3054. xp_assert_close(y, xp.asarray(0.0))
  3055. y = stats.moment(testcase, 0)
  3056. xp_assert_close(y, xp.asarray(1.0))
  3057. y = stats.moment(testcase, 1)
  3058. xp_assert_close(y, xp.asarray(0.0))
  3059. y = stats.moment(testcase, 2)
  3060. xp_assert_close(y, xp.asarray(1.25))
  3061. y = stats.moment(testcase, 3)
  3062. xp_assert_close(y, xp.asarray(0.0))
  3063. y = stats.moment(testcase, 4)
  3064. xp_assert_close(y, xp.asarray(2.5625))
  3065. # check array_like input for moment
  3066. y = stats.moment(testcase, [1, 2, 3, 4])
  3067. xp_assert_close(y, xp.asarray([0, 1.25, 0, 2.5625]))
  3068. # check moment input consists only of integers
  3069. y = stats.moment(testcase, 0.0)
  3070. xp_assert_close(y, xp.asarray(1.0))
  3071. message = 'All elements of `order` must be integral.'
  3072. with pytest.raises(ValueError, match=message):
  3073. stats.moment(testcase, 1.2)
  3074. y = stats.moment(testcase, [1.0, 2, 3, 4.0])
  3075. xp_assert_close(y, xp.asarray([0, 1.25, 0, 2.5625]))
  3076. def test_cases():
  3077. y = stats.moment(xp.asarray([]))
  3078. xp_assert_equal(y, xp.asarray(xp.nan))
  3079. y = stats.moment(xp.asarray([], dtype=xp.float32))
  3080. xp_assert_equal(y, xp.asarray(xp.nan, dtype=xp.float32))
  3081. y = stats.moment(xp.zeros((1, 0)), axis=0)
  3082. xp_assert_equal(y, xp.empty((0,)))
  3083. y = stats.moment(xp.asarray([[]]), axis=1)
  3084. xp_assert_equal(y, xp.asarray([xp.nan]))
  3085. y = stats.moment(xp.asarray([[]]), order=[0, 1], axis=0)
  3086. xp_assert_equal(y, xp.empty((2, 0)))
  3087. def test_nan_policy(self):
  3088. x = np.arange(10.)
  3089. x[9] = np.nan
  3090. assert_equal(stats.moment(x, 2), np.nan)
  3091. assert_almost_equal(stats.moment(x, nan_policy='omit'), 0.0)
  3092. assert_raises(ValueError, stats.moment, x, nan_policy='raise')
  3093. assert_raises(ValueError, stats.moment, x, nan_policy='foobar')
  3094. @pytest.mark.parametrize('dtype', ['float32', 'float64', 'complex128'])
  3095. @pytest.mark.parametrize('expect, order', [(0, 1), (1, 0)])
  3096. def test_constant_moments(self, dtype, expect, order, xp):
  3097. if dtype=='complex128' and is_torch(xp):
  3098. pytest.skip()
  3099. dtype = getattr(xp, dtype)
  3100. rng = np.random.default_rng(3824693518)
  3101. x = xp.asarray(rng.random(5), dtype=dtype)
  3102. y = stats.moment(x, order=order)
  3103. xp_assert_equal(y, xp.asarray(expect, dtype=dtype))
  3104. y = stats.moment(xp.broadcast_to(x, (6, 5)), axis=0, order=order)
  3105. xp_assert_equal(y, xp.full((5,), expect, dtype=dtype))
  3106. y = stats.moment(xp.broadcast_to(x, (1, 2, 3, 4, 5)), axis=2,
  3107. order=order)
  3108. xp_assert_equal(y, xp.full((1, 2, 4, 5), expect, dtype=dtype))
  3109. y = stats.moment(xp.broadcast_to(x, (1, 2, 3, 4, 5)), axis=None,
  3110. order=order)
  3111. xp_assert_equal(y, xp.full((), expect, dtype=dtype))
  3112. def test_moment_propagate_nan(self, xp):
  3113. # Check that the shape of the result is the same for inputs
  3114. # with and without nans, cf gh-5817
  3115. a = xp.reshape(xp.arange(8.), (2, -1))
  3116. a = xpx.at(a)[1, 0].set(xp.nan)
  3117. mm = stats.moment(xp.asarray(a), 2, axis=1)
  3118. xp_assert_close(mm, xp.asarray([1.25, xp.nan]), atol=1e-15)
  3119. def test_moment_empty_order(self, xp):
  3120. # tests moment with empty `order` list
  3121. with pytest.raises(ValueError, match=r"`order` must be a scalar or a"
  3122. r" non-empty 1D array."):
  3123. stats.moment(xp.asarray([1, 2, 3, 4]), order=[])
  3124. def test_rename_moment_order(self, xp):
  3125. # Parameter 'order' was formerly known as 'moment'. The old name
  3126. # has not been deprecated, so it must continue to work.
  3127. x = xp.arange(10)
  3128. res = stats.moment(x, moment=3)
  3129. ref = stats.moment(x, order=3)
  3130. xp_assert_equal(res, ref)
  3131. def test_moment_accuracy(self):
  3132. # 'moment' must have a small enough error compared to the slower
  3133. # but very accurate numpy.power() implementation.
  3134. tc_no_mean = (self.testcase_moment_accuracy
  3135. - np.mean(self.testcase_moment_accuracy))
  3136. assert_allclose(np.power(tc_no_mean, 42).mean(),
  3137. stats.moment(self.testcase_moment_accuracy, 42))
  3138. @pytest.mark.parametrize('order', [0, 1, 2, 3])
  3139. @pytest.mark.parametrize('axis', [-1, 0, 1])
  3140. @pytest.mark.parametrize('center', [None, 0])
  3141. @pytest.mark.filterwarnings(
  3142. "ignore:divide by zero encountered in divide:RuntimeWarning:dask"
  3143. )
  3144. def test_moment_array_api(self, xp, order, axis, center):
  3145. rng = np.random.default_rng(34823589259425)
  3146. x = rng.random(size=(5, 6, 7))
  3147. res = stats.moment(xp.asarray(x), order, axis=axis, center=center)
  3148. ref = xp.asarray(_moment(x, order, axis, mean=center))
  3149. xp_assert_close(res, ref)
  3150. class SkewKurtosisTest:
  3151. scalar_testcase = 4.
  3152. testcase = [1., 2., 3., 4.]
  3153. testmathworks = [1.165, 0.6268, 0.0751, 0.3516, -0.6965]
  3154. def test_empty_1d(self, xp):
  3155. x = xp.asarray([])
  3156. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  3157. res = self.stat_fun(x)
  3158. xp_assert_equal(res, xp.asarray(xp.nan))
  3159. @make_xp_test_case(stats.skew)
  3160. class TestSkew(SkewKurtosisTest):
  3161. def stat_fun(self, x):
  3162. return stats.skew(x)
  3163. @pytest.mark.filterwarnings(
  3164. "ignore:invalid value encountered in scalar divide:RuntimeWarning:dask"
  3165. )
  3166. def test_skewness(self, xp):
  3167. # Scalar test case
  3168. y = stats.skew(xp.asarray(self.scalar_testcase))
  3169. xp_assert_close(y, xp.asarray(xp.nan))
  3170. # sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0) /
  3171. # ((sqrt(var(testmathworks)*4/5))**3)/5
  3172. y = stats.skew(xp.asarray(self.testmathworks))
  3173. xp_assert_close(y, xp.asarray(-0.29322304336607), atol=1e-10)
  3174. y = stats.skew(xp.asarray(self.testmathworks), bias=0)
  3175. xp_assert_close(y, xp.asarray(-0.437111105023940), atol=1e-10)
  3176. y = stats.skew(xp.asarray(self.testcase))
  3177. xp_assert_close(y, xp.asarray(0.0), atol=1e-10)
  3178. def test_nan_policy(self):
  3179. # initially, nan_policy is ignored with alternative backends
  3180. x = np.arange(10.)
  3181. x[9] = np.nan
  3182. with np.errstate(invalid='ignore'):
  3183. assert_equal(stats.skew(x), np.nan)
  3184. assert_equal(stats.skew(x, nan_policy='omit'), 0.)
  3185. assert_raises(ValueError, stats.skew, x, nan_policy='raise')
  3186. assert_raises(ValueError, stats.skew, x, nan_policy='foobar')
  3187. def test_skewness_scalar(self):
  3188. # `skew` must return a scalar for 1-dim input (only for NumPy arrays)
  3189. assert_equal(stats.skew(arange(10)), 0.0)
  3190. def test_skew_propagate_nan(self, xp):
  3191. # Check that the shape of the result is the same for inputs
  3192. # with and without nans, cf gh-5817
  3193. a = xp.arange(8.)
  3194. a = xp.reshape(a, (2, -1))
  3195. a = xpx.at(a)[1, 0].set(xp.nan)
  3196. with np.errstate(invalid='ignore'):
  3197. s = stats.skew(xp.asarray(a), axis=1)
  3198. xp_assert_equal(s, xp.asarray([0, xp.nan]))
  3199. def test_skew_constant_value(self, xp):
  3200. # Skewness of a constant input should be NaN (gh-16061)
  3201. a = xp.repeat(xp.asarray([-0.27829495]), 10)
  3202. with eager_warns(RuntimeWarning, match="Precision loss occurred", xp=xp):
  3203. xp_assert_equal(stats.skew(a), xp.asarray(xp.nan))
  3204. xp_assert_equal(stats.skew(a*2.**50), xp.asarray(xp.nan))
  3205. xp_assert_equal(stats.skew(a/2.**50), xp.asarray(xp.nan))
  3206. xp_assert_equal(stats.skew(a, bias=False), xp.asarray(xp.nan))
  3207. # # similarly, from gh-11086:
  3208. a = xp.asarray([14.3]*7)
  3209. xp_assert_equal(stats.skew(a), xp.asarray(xp.nan))
  3210. a = 1. + xp.arange(-3., 4)*1e-16
  3211. xp_assert_equal(stats.skew(a), xp.asarray(xp.nan))
  3212. @skip_xp_backends(eager_only=True)
  3213. def test_precision_loss_gh15554(self, xp):
  3214. # gh-15554 was one of several issues that have reported problems with
  3215. # constant or near-constant input. We can't always fix these, but
  3216. # make sure there's a warning.
  3217. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  3218. rng = np.random.default_rng(34095309370)
  3219. a = rng.random(size=(100, 10))
  3220. a[:, 0] = 1.01
  3221. stats.skew(xp.asarray(a))
  3222. @pytest.mark.parametrize('axis', [-1, 0, 2, None])
  3223. @pytest.mark.parametrize('bias', [False, True])
  3224. def test_vectorization(self, xp, axis, bias):
  3225. # Behavior with array input is barely tested above. Compare
  3226. # against naive implementation.
  3227. rng = np.random.default_rng(1283413549926)
  3228. x = xp.asarray(rng.random((3, 4, 5)))
  3229. def skewness(a, axis, bias):
  3230. # Simple implementation of skewness
  3231. if axis is None:
  3232. a = xp.reshape(a, (-1,))
  3233. axis = 0
  3234. mean = xp.mean(a, axis=axis, keepdims=True)
  3235. mu3 = xp.mean((a - mean)**3, axis=axis)
  3236. std = xp.std(a, axis=axis)
  3237. res = mu3 / std ** 3
  3238. if not bias:
  3239. n = a.shape[axis]
  3240. res *= ((n - 1.0) * n) ** 0.5 / (n - 2.0)
  3241. return res
  3242. res = stats.skew(x, axis=axis, bias=bias)
  3243. ref = skewness(x, axis=axis, bias=bias)
  3244. xp_assert_close(res, ref)
  3245. @make_xp_test_case(stats.kurtosis)
  3246. class TestKurtosis(SkewKurtosisTest):
  3247. def stat_fun(self, x):
  3248. return stats.kurtosis(x)
  3249. @pytest.mark.filterwarnings("ignore:invalid value encountered in scalar divide")
  3250. def test_kurtosis(self, xp):
  3251. # Scalar test case
  3252. y = stats.kurtosis(xp.asarray(self.scalar_testcase))
  3253. assert xp.isnan(y)
  3254. # sum((testcase-mean(testcase,axis=0))**4,axis=0)
  3255. # / ((sqrt(var(testcase)*3/4))**4)
  3256. # / 4
  3257. #
  3258. # sum((test2-mean(testmathworks,axis=0))**4,axis=0)
  3259. # / ((sqrt(var(testmathworks)*4/5))**4)
  3260. # / 5
  3261. #
  3262. # Set flags for axis = 0 and
  3263. # fisher=0 (Pearson's defn of kurtosis for compatibility with Matlab)
  3264. y = stats.kurtosis(xp.asarray(self.testmathworks), 0, fisher=0, bias=1)
  3265. xp_assert_close(y, xp.asarray(2.1658856802973))
  3266. # Note that MATLAB has confusing docs for the following case
  3267. # kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
  3268. # kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3)
  3269. # The MATLAB docs imply that both should give Fisher's
  3270. y = stats.kurtosis(xp.asarray(self.testmathworks), fisher=0, bias=0)
  3271. xp_assert_close(y, xp.asarray(3.663542721189047))
  3272. y = stats.kurtosis(xp.asarray(self.testcase), 0, 0)
  3273. xp_assert_close(y, xp.asarray(1.64))
  3274. x = xp.arange(10.)
  3275. x = xp.where(x == 8, xp.nan, x)
  3276. xp_assert_equal(stats.kurtosis(x), xp.asarray(xp.nan))
  3277. def test_kurtosis_nan_policy(self):
  3278. # nan_policy only for NumPy right now
  3279. x = np.arange(10.)
  3280. x[9] = np.nan
  3281. assert_almost_equal(stats.kurtosis(x, nan_policy='omit'), -1.230000)
  3282. assert_raises(ValueError, stats.kurtosis, x, nan_policy='raise')
  3283. assert_raises(ValueError, stats.kurtosis, x, nan_policy='foobar')
  3284. def test_kurtosis_array_scalar(self):
  3285. # "array scalars" do not exist in other backends
  3286. assert_equal(type(stats.kurtosis([1, 2, 3])), np.float64)
  3287. def test_kurtosis_propagate_nan(self):
  3288. # nan_policy only for NumPy right now
  3289. # Check that the shape of the result is the same for inputs
  3290. # with and without nans, cf gh-5817
  3291. a = np.arange(8).reshape(2, -1).astype(float)
  3292. a[1, 0] = np.nan
  3293. k = stats.kurtosis(a, axis=1, nan_policy="propagate")
  3294. np.testing.assert_allclose(k, [-1.36, np.nan], atol=1e-15)
  3295. def test_kurtosis_constant_value(self, xp):
  3296. # Kurtosis of a constant input should be NaN (gh-16061)
  3297. a = xp.asarray([-0.27829495]*10)
  3298. with eager_warns(RuntimeWarning, match="Precision loss occurred", xp=xp):
  3299. assert xp.isnan(stats.kurtosis(a, fisher=False))
  3300. assert xp.isnan(stats.kurtosis(a * float(2**50), fisher=False))
  3301. assert xp.isnan(stats.kurtosis(a / float(2**50), fisher=False))
  3302. assert xp.isnan(stats.kurtosis(a, fisher=False, bias=False))
  3303. @pytest.mark.parametrize('axis', [-1, 0, 2, None])
  3304. @pytest.mark.parametrize('bias', [False, True])
  3305. @pytest.mark.parametrize('fisher', [False, True])
  3306. def test_vectorization(self, xp, axis, bias, fisher):
  3307. # Behavior with array input is not tested above. Compare
  3308. # against naive implementation.
  3309. rng = np.random.default_rng(1283413549926)
  3310. x = xp.asarray(rng.random((4, 5, 6)))
  3311. def kurtosis(a, axis, bias, fisher):
  3312. # Simple implementation of kurtosis
  3313. if axis is None:
  3314. a = xp.reshape(a, (-1,))
  3315. axis = 0
  3316. mean = xp.mean(a, axis=axis, keepdims=True)
  3317. mu4 = xp.mean((a - mean)**4, axis=axis)
  3318. mu2 = xp.var(a, axis=axis, correction=0)
  3319. if bias:
  3320. res = mu4 / mu2**2 - 3
  3321. else:
  3322. n = a.shape[axis]
  3323. # https://en.wikipedia.org/wiki/Kurtosis#Standard_unbiased_estimator
  3324. res = (n-1) / ((n-2) * (n-3)) * ((n + 1) * mu4/mu2**2 - 3*(n-1))
  3325. # I know it looks strange to subtract then add 3,
  3326. # but it is simpler than the alternatives
  3327. return res if fisher else res + 3
  3328. res = stats.kurtosis(x, axis=axis, bias=bias, fisher=fisher)
  3329. ref = kurtosis(x, axis=axis, bias=bias, fisher=fisher)
  3330. xp_assert_close(res, ref)
  3331. @hypothesis.strategies.composite
  3332. def ttest_data_axis_strategy(draw):
  3333. # draw an array under shape and value constraints
  3334. elements = dict(allow_nan=False, allow_infinity=False)
  3335. shape = npst.array_shapes(min_dims=1, min_side=2)
  3336. # The test that uses this, `test_pvalue_ci`, uses `float64` to test
  3337. # extreme `alpha`. It could be adjusted to test a dtype-dependent
  3338. # range of `alpha` if this strategy is needed to generate other floats.
  3339. data = draw(npst.arrays(dtype=np.float64, elements=elements, shape=shape))
  3340. # determine axes over which nonzero variance can be computed accurately
  3341. ok_axes = []
  3342. # Locally, I don't need catch_warnings or simplefilter, and I can just
  3343. # suppress RuntimeWarning. I include all that in hope of getting the same
  3344. # behavior on CI.
  3345. with warnings.catch_warnings():
  3346. warnings.simplefilter("error")
  3347. for axis in range(len(data.shape)):
  3348. with contextlib.suppress(Exception):
  3349. var = stats.moment(data, order=2, axis=axis)
  3350. if np.all(var > 0) and np.all(np.isfinite(var)):
  3351. ok_axes.append(axis)
  3352. # if there are no valid axes, tell hypothesis to try a different example
  3353. hypothesis.assume(ok_axes)
  3354. # draw one of the valid axes
  3355. axis = draw(hypothesis.strategies.sampled_from(ok_axes))
  3356. return data, axis
  3357. @make_xp_test_case(stats.ttest_1samp)
  3358. class TestStudentTest:
  3359. # Preserving original test cases.
  3360. # Recomputed statistics and p-values with R t.test, e.g.
  3361. # options(digits=16)
  3362. # t.test(c(-1., 0., 1.), mu=2)
  3363. X1 = [-1., 0., 1.]
  3364. X2 = [0., 1., 2.]
  3365. T1_0 = 0.
  3366. P1_0 = 1.
  3367. T1_1 = -1.7320508075689
  3368. P1_1 = 0.2254033307585
  3369. T1_2 = -3.4641016151378
  3370. P1_2 = 0.07417990022745
  3371. T2_0 = 1.7320508075689
  3372. P2_0 = 0.2254033307585
  3373. P1_1_l = P1_1 / 2
  3374. P1_1_g = 1 - (P1_1 / 2)
  3375. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  3376. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  3377. def test_onesample(self, xp):
  3378. with warnings.catch_warnings(), \
  3379. np.errstate(invalid="ignore", divide="ignore"):
  3380. warnings.filterwarnings(
  3381. "ignore", "Degrees of freedom <= 0 for slice", RuntimeWarning)
  3382. a = xp.asarray(4.) if not is_numpy(xp) else 4.
  3383. t, p = stats.ttest_1samp(a, 3.)
  3384. xp_assert_equal(t, xp.asarray(xp.nan))
  3385. xp_assert_equal(p, xp.asarray(xp.nan))
  3386. t, p = stats.ttest_1samp(xp.asarray(self.X1), 0.)
  3387. xp_assert_close(t, xp.asarray(self.T1_0))
  3388. xp_assert_close(p, xp.asarray(self.P1_0))
  3389. res = stats.ttest_1samp(xp.asarray(self.X1), 0.)
  3390. attributes = ('statistic', 'pvalue')
  3391. check_named_results(res, attributes, xp=xp)
  3392. t, p = stats.ttest_1samp(xp.asarray(self.X2), 0.)
  3393. xp_assert_close(t, xp.asarray(self.T2_0))
  3394. xp_assert_close(p, xp.asarray(self.P2_0))
  3395. t, p = stats.ttest_1samp(xp.asarray(self.X1), 1.)
  3396. xp_assert_close(t, xp.asarray(self.T1_1))
  3397. xp_assert_close(p, xp.asarray(self.P1_1))
  3398. t, p = stats.ttest_1samp(xp.asarray(self.X1), 2.)
  3399. xp_assert_close(t, xp.asarray(self.T1_2))
  3400. xp_assert_close(p, xp.asarray(self.P1_2))
  3401. def test_onesample_nan_policy_propagate(self, xp):
  3402. x = stats.norm.rvs(loc=5, scale=10, size=51, random_state=7654567)
  3403. x[50] = np.nan
  3404. x = xp.asarray(x, dtype=xp_default_dtype(xp))
  3405. res = stats.ttest_1samp(x, 5.0)
  3406. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  3407. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  3408. @skip_xp_backends(eager_only=True, reason="lazy arrays don't do 'raise'.")
  3409. def test_onesample_nan_policy_omit_raise(self, xp):
  3410. x = stats.norm.rvs(loc=5, scale=10, size=51, random_state=7654567)
  3411. x[50] = np.nan
  3412. x = xp.asarray(x, dtype=xp_default_dtype(xp))
  3413. res = stats.ttest_1samp(x, 5.0, nan_policy='omit')
  3414. xp_assert_close(res.statistic, xp.asarray(-1.6412624074367159))
  3415. xp_assert_close(res.pvalue, xp.asarray(0.107147027334048005))
  3416. with pytest.raises(ValueError, match="The input contains nan values"):
  3417. stats.ttest_1samp(x, 5.0, nan_policy='raise')
  3418. with pytest.raises(ValueError, match="nan_policy must be one of"):
  3419. stats.ttest_1samp(x, 5.0, nan_policy='foobar')
  3420. @pytest.mark.filterwarnings("ignore:divide by zero encountered in divide")
  3421. def test_1samp_alternative(self, xp):
  3422. message = "`alternative` must be 'less', 'greater', or 'two-sided'."
  3423. with pytest.raises(ValueError, match=message):
  3424. stats.ttest_1samp(xp.asarray(self.X1), 0., alternative="error")
  3425. t, p = stats.ttest_1samp(xp.asarray(self.X1), 1., alternative="less")
  3426. xp_assert_close(p, xp.asarray(self.P1_1_l))
  3427. xp_assert_close(t, xp.asarray(self.T1_1))
  3428. t, p = stats.ttest_1samp(xp.asarray(self.X1), 1., alternative="greater")
  3429. xp_assert_close(p, xp.asarray(self.P1_1_g))
  3430. xp_assert_close(t, xp.asarray(self.T1_1))
  3431. @skip_xp_backends('jax.numpy', reason='Generic stdtrit mutates array.')
  3432. @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater'])
  3433. def test_1samp_ci_1d(self, xp, alternative):
  3434. # test confidence interval method against reference values
  3435. rng = np.random.default_rng(8066178009154342972)
  3436. n = 10
  3437. x = rng.normal(size=n, loc=1.5, scale=2)
  3438. popmean = rng.normal() # this shouldn't affect confidence interval
  3439. # Reference values generated with R t.test:
  3440. # options(digits=16)
  3441. # x = c(2.75532884, 0.93892217, 0.94835861, 1.49489446, -0.62396595,
  3442. # -1.88019867, -1.55684465, 4.88777104, 5.15310979, 4.34656348)
  3443. # t.test(x, conf.level=0.85, alternative='l')
  3444. dtype = xp.asarray(1.0).dtype
  3445. x = xp.asarray(x, dtype=dtype)
  3446. popmean = xp.asarray(popmean, dtype=dtype)
  3447. ref = {'two-sided': [0.3594423211709136, 2.9333455028290860],
  3448. 'greater': [0.7470806207371626, np.inf],
  3449. 'less': [-np.inf, 2.545707203262837]}
  3450. res = stats.ttest_1samp(x, popmean=popmean, alternative=alternative)
  3451. ci = res.confidence_interval(confidence_level=0.85)
  3452. xp_assert_close(ci.low, xp.asarray(ref[alternative][0]))
  3453. xp_assert_close(ci.high, xp.asarray(ref[alternative][1]))
  3454. xp_assert_equal(res.df, xp.asarray(n-1))
  3455. def test_1samp_ci_iv(self, xp):
  3456. # test `confidence_interval` method input validation
  3457. res = stats.ttest_1samp(xp.arange(10.), 0.)
  3458. message = '`confidence_level` must be a number between 0 and 1.'
  3459. with pytest.raises(ValueError, match=message):
  3460. res.confidence_interval(confidence_level=10)
  3461. @skip_xp_backends(np_only=True, reason='Too slow.')
  3462. @pytest.mark.xslow
  3463. @hypothesis.given(alpha=hypothesis.strategies.floats(1e-15, 1-1e-15),
  3464. data_axis=ttest_data_axis_strategy())
  3465. @pytest.mark.parametrize('alternative', ['less', 'greater'])
  3466. def test_pvalue_ci(self, alpha, data_axis, alternative, xp):
  3467. # test relationship between one-sided p-values and confidence intervals
  3468. data, axis = data_axis
  3469. data = xp.asarray(data)
  3470. res = stats.ttest_1samp(data, 0.,
  3471. alternative=alternative, axis=axis)
  3472. l, u = res.confidence_interval(confidence_level=alpha)
  3473. popmean = l if alternative == 'greater' else u
  3474. popmean = xp.expand_dims(popmean, axis=axis)
  3475. res = stats.ttest_1samp(data, popmean, alternative=alternative, axis=axis)
  3476. shape = list(data.shape)
  3477. shape.pop(axis)
  3478. # `float64` is used to correspond with extreme range of `alpha`
  3479. ref = xp.broadcast_to(xp.asarray(1-alpha, dtype=xp.float64), shape)
  3480. xp_assert_close(res.pvalue, ref)
  3481. class TestPercentileOfScore:
  3482. def f(self, *args, **kwargs):
  3483. return stats.percentileofscore(*args, **kwargs)
  3484. @pytest.mark.parametrize("kind, result", [("rank", 40),
  3485. ("mean", 35),
  3486. ("strict", 30),
  3487. ("weak", 40)])
  3488. def test_unique(self, kind, result):
  3489. a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  3490. assert_equal(self.f(a, 4, kind=kind), result)
  3491. @pytest.mark.parametrize("kind, result", [("rank", 45),
  3492. ("mean", 40),
  3493. ("strict", 30),
  3494. ("weak", 50)])
  3495. def test_multiple2(self, kind, result):
  3496. a = [1, 2, 3, 4, 4, 5, 6, 7, 8, 9]
  3497. assert_equal(self.f(a, 4, kind=kind), result)
  3498. @pytest.mark.parametrize("kind, result", [("rank", 50),
  3499. ("mean", 45),
  3500. ("strict", 30),
  3501. ("weak", 60)])
  3502. def test_multiple3(self, kind, result):
  3503. a = [1, 2, 3, 4, 4, 4, 5, 6, 7, 8]
  3504. assert_equal(self.f(a, 4, kind=kind), result)
  3505. @pytest.mark.parametrize("kind, result", [("rank", 30),
  3506. ("mean", 30),
  3507. ("strict", 30),
  3508. ("weak", 30)])
  3509. def test_missing(self, kind, result):
  3510. a = [1, 2, 3, 5, 6, 7, 8, 9, 10, 11]
  3511. assert_equal(self.f(a, 4, kind=kind), result)
  3512. @pytest.mark.parametrize("kind, result", [("rank", 40),
  3513. ("mean", 35),
  3514. ("strict", 30),
  3515. ("weak", 40)])
  3516. def test_large_numbers(self, kind, result):
  3517. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
  3518. assert_equal(self.f(a, 40, kind=kind), result)
  3519. @pytest.mark.parametrize("kind, result", [("rank", 50),
  3520. ("mean", 45),
  3521. ("strict", 30),
  3522. ("weak", 60)])
  3523. def test_large_numbers_multiple3(self, kind, result):
  3524. a = [10, 20, 30, 40, 40, 40, 50, 60, 70, 80]
  3525. assert_equal(self.f(a, 40, kind=kind), result)
  3526. @pytest.mark.parametrize("kind, result", [("rank", 30),
  3527. ("mean", 30),
  3528. ("strict", 30),
  3529. ("weak", 30)])
  3530. def test_large_numbers_missing(self, kind, result):
  3531. a = [10, 20, 30, 50, 60, 70, 80, 90, 100, 110]
  3532. assert_equal(self.f(a, 40, kind=kind), result)
  3533. @pytest.mark.parametrize("kind, result", [("rank", [0, 10, 100, 100]),
  3534. ("mean", [0, 5, 95, 100]),
  3535. ("strict", [0, 0, 90, 100]),
  3536. ("weak", [0, 10, 100, 100])])
  3537. def test_boundaries(self, kind, result):
  3538. a = [10, 20, 30, 50, 60, 70, 80, 90, 100, 110]
  3539. assert_equal(self.f(a, [0, 10, 110, 200], kind=kind), result)
  3540. @pytest.mark.parametrize("kind, result", [("rank", [0, 10, 100]),
  3541. ("mean", [0, 5, 95]),
  3542. ("strict", [0, 0, 90]),
  3543. ("weak", [0, 10, 100])])
  3544. def test_inf(self, kind, result):
  3545. a = [1, 2, 3, 4, 5, 6, 7, 8, 9, +np.inf]
  3546. assert_equal(self.f(a, [-np.inf, 1, +np.inf], kind=kind), result)
  3547. cases = [("propagate", [], 1, np.nan),
  3548. ("propagate", [np.nan], 1, np.nan),
  3549. ("propagate", [np.nan], [0, 1, 2], [np.nan, np.nan, np.nan]),
  3550. ("propagate", [1, 2], [1, 2, np.nan], [50, 100, np.nan]),
  3551. ("omit", [1, 2, np.nan], [0, 1, 2], [0, 50, 100]),
  3552. ("omit", [1, 2], [0, 1, np.nan], [0, 50, np.nan]),
  3553. ("omit", [np.nan, np.nan], [0, 1, 2], [np.nan, np.nan, np.nan])]
  3554. @pytest.mark.parametrize("policy, a, score, result", cases)
  3555. def test_nans_ok(self, policy, a, score, result):
  3556. assert_equal(self.f(a, score, nan_policy=policy), result)
  3557. cases = [
  3558. ("raise", [1, 2, 3, np.nan], [1, 2, 3],
  3559. "The input contains nan values"),
  3560. ("raise", [1, 2, 3], [1, 2, 3, np.nan],
  3561. "The input contains nan values"),
  3562. ]
  3563. @pytest.mark.parametrize("policy, a, score, message", cases)
  3564. def test_nans_fail(self, policy, a, score, message):
  3565. with assert_raises(ValueError, match=message):
  3566. self.f(a, score, nan_policy=policy)
  3567. @pytest.mark.parametrize("shape", [
  3568. (6, ),
  3569. (2, 3),
  3570. (2, 1, 3),
  3571. (2, 1, 1, 3),
  3572. ])
  3573. def test_nd(self, shape):
  3574. a = np.array([0, 1, 2, 3, 4, 5])
  3575. scores = a.reshape(shape)
  3576. results = scores*10
  3577. a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  3578. assert_equal(self.f(a, scores, kind="rank"), results)
  3579. def test_multidimensional_error(self):
  3580. # gh-21563 reported that `percentileofscore` accepted multidimensional
  3581. # arrays but did not produce meaningful results.
  3582. message = "`a` must be 1-dimensional."
  3583. with pytest.raises(ValueError, match=message):
  3584. stats.percentileofscore(np.ones((3, 3)), 1)
  3585. PowerDivCase = namedtuple('Case', # type: ignore[name-match]
  3586. ['f_obs', 'f_exp', 'ddof', 'axis',
  3587. 'chi2', # Pearson's
  3588. 'log', # G-test (log-likelihood)
  3589. 'mod_log', # Modified log-likelihood
  3590. 'cr', # Cressie-Read (lambda=2/3)
  3591. ])
  3592. # The details of the first two elements in power_div_1d_cases are used
  3593. # in a test in TestPowerDivergence. Check that code before making
  3594. # any changes here.
  3595. power_div_1d_cases = [
  3596. # Use the default f_exp.
  3597. PowerDivCase(f_obs=[4, 8, 12, 8], f_exp=None, ddof=0, axis=None,
  3598. chi2=4,
  3599. log=2*(4*np.log(4/8) + 12*np.log(12/8)),
  3600. mod_log=2*(8*np.log(8/4) + 8*np.log(8/12)),
  3601. cr=(4*((4/8)**(2/3) - 1) + 12*((12/8)**(2/3) - 1))/(5/9)),
  3602. # Give a non-uniform f_exp.
  3603. PowerDivCase(f_obs=[4, 8, 12, 8], f_exp=[2, 16, 12, 2], ddof=0, axis=None,
  3604. chi2=24,
  3605. log=2*(4*np.log(4/2) + 8*np.log(8/16) + 8*np.log(8/2)),
  3606. mod_log=2*(2*np.log(2/4) + 16*np.log(16/8) + 2*np.log(2/8)),
  3607. cr=(4*((4/2)**(2/3) - 1) + 8*((8/16)**(2/3) - 1) +
  3608. 8*((8/2)**(2/3) - 1))/(5/9)),
  3609. # f_exp is a scalar.
  3610. PowerDivCase(f_obs=[4, 8, 12, 8], f_exp=8, ddof=0, axis=None,
  3611. chi2=4,
  3612. log=2*(4*np.log(4/8) + 12*np.log(12/8)),
  3613. mod_log=2*(8*np.log(8/4) + 8*np.log(8/12)),
  3614. cr=(4*((4/8)**(2/3) - 1) + 12*((12/8)**(2/3) - 1))/(5/9)),
  3615. # f_exp equal to f_obs.
  3616. PowerDivCase(f_obs=[3, 5, 7, 9], f_exp=[3, 5, 7, 9], ddof=0, axis=0,
  3617. chi2=0, log=0, mod_log=0, cr=0),
  3618. ]
  3619. power_div_empty_cases = [
  3620. # Shape is (0,)--a data set with length 0. The computed
  3621. # test statistic should be 0.
  3622. PowerDivCase(f_obs=[],
  3623. f_exp=None, ddof=0, axis=0,
  3624. chi2=0, log=0, mod_log=0, cr=0),
  3625. # Shape is (0, 3). This is 3 data sets, but each data set has
  3626. # length 0, so the computed test statistic should be [0, 0, 0].
  3627. PowerDivCase(f_obs=np.array([[],[],[]]).T,
  3628. f_exp=None, ddof=0, axis=0,
  3629. chi2=[0, 0, 0],
  3630. log=[0, 0, 0],
  3631. mod_log=[0, 0, 0],
  3632. cr=[0, 0, 0]),
  3633. # Shape is (3, 0). This represents an empty collection of
  3634. # data sets in which each data set has length 3. The test
  3635. # statistic should be an empty array.
  3636. PowerDivCase(f_obs=np.array([[],[],[]]),
  3637. f_exp=None, ddof=0, axis=0,
  3638. chi2=[],
  3639. log=[],
  3640. mod_log=[],
  3641. cr=[]),
  3642. ]
  3643. @make_xp_test_case(stats.power_divergence)
  3644. class TestPowerDivergence:
  3645. def check_power_divergence(self, f_obs, f_exp, ddof, axis, lambda_,
  3646. expected_stat, xp):
  3647. dtype = xp.asarray(1.).dtype
  3648. f_obs = xp.asarray(f_obs, dtype=dtype)
  3649. f_exp = xp.asarray(f_exp, dtype=dtype) if f_exp is not None else f_exp
  3650. if axis is None:
  3651. num_obs = xp_size(f_obs)
  3652. else:
  3653. arrays = (xp.broadcast_arrays(f_obs, f_exp) if f_exp is not None
  3654. else (f_obs,))
  3655. num_obs = arrays[0].shape[axis]
  3656. with warnings.catch_warnings():
  3657. warnings.filterwarnings("ignore", "Mean of empty slice", RuntimeWarning)
  3658. stat, p = stats.power_divergence(
  3659. f_obs, f_exp=f_exp, ddof=ddof,
  3660. axis=axis, lambda_=lambda_)
  3661. xp_assert_close(stat, xp.asarray(expected_stat, dtype=dtype))
  3662. if lambda_ == 1 or lambda_ == "pearson":
  3663. # Also test stats.chisquare.
  3664. stat, p = stats.chisquare(f_obs, f_exp=f_exp, ddof=ddof,
  3665. axis=axis)
  3666. xp_assert_close(stat, xp.asarray(expected_stat, dtype=dtype))
  3667. ddof = np.asarray(ddof)
  3668. expected_p = stats.distributions.chi2.sf(expected_stat,
  3669. num_obs - 1 - ddof)
  3670. xp_assert_close(p, xp.asarray(expected_p, dtype=dtype))
  3671. @pytest.mark.parametrize('case', power_div_1d_cases)
  3672. @pytest.mark.parametrize('lambda_stat',
  3673. [(None, 'chi2'), ('pearson', 'chi2'), (1, 'chi2'),
  3674. ('log-likelihood', 'log'), ('mod-log-likelihood', 'mod_log'),
  3675. ('cressie-read', 'cr'), (2/3, 'cr')])
  3676. def test_basic(self, case, lambda_stat, xp):
  3677. lambda_, attr = lambda_stat
  3678. expected_stat = getattr(case, attr)
  3679. self.check_power_divergence(case.f_obs, case.f_exp, case.ddof, case.axis,
  3680. lambda_, expected_stat, xp)
  3681. def test_axis(self, xp):
  3682. case0 = power_div_1d_cases[0]
  3683. case1 = power_div_1d_cases[1]
  3684. f_obs = np.vstack((case0.f_obs, case1.f_obs))
  3685. f_exp = np.vstack((np.ones_like(case0.f_obs)*np.mean(case0.f_obs),
  3686. case1.f_exp))
  3687. # Check the four computational code paths in power_divergence
  3688. # using a 2D array with axis=1.
  3689. f_obs = xp.asarray(f_obs)
  3690. f_exp = xp.asarray(f_exp) if f_exp is not None else f_exp
  3691. self.check_power_divergence(
  3692. f_obs, f_exp, 0, 1,
  3693. "pearson", [case0.chi2, case1.chi2], xp=xp)
  3694. self.check_power_divergence(
  3695. f_obs, f_exp, 0, 1,
  3696. "log-likelihood", [case0.log, case1.log], xp=xp)
  3697. self.check_power_divergence(
  3698. f_obs, f_exp, 0, 1,
  3699. "mod-log-likelihood", [case0.mod_log, case1.mod_log], xp=xp)
  3700. self.check_power_divergence(
  3701. f_obs, f_exp, 0, 1,
  3702. "cressie-read", [case0.cr, case1.cr], xp=xp)
  3703. # Reshape case0.f_obs to shape (2,2), and use axis=None.
  3704. # The result should be the same.
  3705. f_obs_reshape = xp.reshape(xp.asarray(case0.f_obs), (2, 2))
  3706. self.check_power_divergence(
  3707. f_obs_reshape, None, 0, None,
  3708. "pearson", case0.chi2, xp=xp)
  3709. def test_ddof_broadcasting(self, xp):
  3710. # Test that ddof broadcasts correctly.
  3711. # ddof does not affect the test statistic. It is broadcast
  3712. # with the computed test statistic for the computation of
  3713. # the p value.
  3714. case0 = power_div_1d_cases[0]
  3715. case1 = power_div_1d_cases[1]
  3716. # Create 4x2 arrays of observed and expected frequencies.
  3717. f_obs = np.vstack((case0.f_obs, case1.f_obs)).T
  3718. f_exp = np.vstack((np.ones_like(case0.f_obs)*np.mean(case0.f_obs),
  3719. case1.f_exp)).T
  3720. expected_chi2 = [case0.chi2, case1.chi2]
  3721. dtype = xp.asarray(1.).dtype
  3722. f_obs = xp.asarray(f_obs, dtype=dtype)
  3723. f_exp = xp.asarray(f_exp, dtype=dtype)
  3724. expected_chi2 = xp.asarray(expected_chi2, dtype=dtype)
  3725. # ddof has shape (2, 1). This is broadcast with the computed
  3726. # statistic, so p will have shape (2,2).
  3727. ddof = xp.asarray([[0], [1]])
  3728. stat, p = stats.power_divergence(f_obs, f_exp, ddof=ddof)
  3729. xp_assert_close(stat, expected_chi2)
  3730. # Compute the p values separately, passing in scalars for ddof.
  3731. _, p0 = stats.power_divergence(f_obs, f_exp, ddof=ddof[0, 0])
  3732. _, p1 = stats.power_divergence(f_obs, f_exp, ddof=ddof[1, 0])
  3733. expected_p = xp.concat((p0[xp.newaxis, :], p1[xp.newaxis, :]), axis=0)
  3734. xp_assert_close(p, expected_p)
  3735. @pytest.mark.parametrize('case', power_div_empty_cases)
  3736. @pytest.mark.parametrize('lambda_stat',
  3737. [('pearson', 'chi2'), ('log-likelihood', 'log'),
  3738. ('mod-log-likelihood', 'mod_log'),
  3739. ('cressie-read', 'cr'), (2/3, 'cr')])
  3740. def test_empty_cases(self, case, lambda_stat, xp):
  3741. lambda_, attr = lambda_stat
  3742. expected_stat = getattr(case, attr)
  3743. with warnings.catch_warnings():
  3744. self.check_power_divergence(
  3745. case.f_obs, case.f_exp, case.ddof, case.axis,
  3746. lambda_, expected_stat, xp)
  3747. def test_power_divergence_result_attributes(self, xp):
  3748. f_obs = power_div_1d_cases[0].f_obs
  3749. f_exp = power_div_1d_cases[0].f_exp
  3750. ddof = power_div_1d_cases[0].ddof
  3751. axis = power_div_1d_cases[0].axis
  3752. dtype = xp.asarray(1.).dtype
  3753. f_obs = xp.asarray(f_obs, dtype=dtype)
  3754. # f_exp is None
  3755. res = stats.power_divergence(f_obs, f_exp=f_exp, ddof=ddof,
  3756. axis=axis, lambda_="pearson")
  3757. attributes = ('statistic', 'pvalue')
  3758. check_named_results(res, attributes, xp=xp)
  3759. def test_power_divergence_gh_12282(self, xp):
  3760. # The sums of observed and expected frequencies must match
  3761. f_obs = xp.asarray([[10., 20.], [30., 20.]])
  3762. f_exp = xp.asarray([[5., 15.], [35., 25.]])
  3763. message = 'For each axis slice...'
  3764. with pytest.raises(ValueError, match=message):
  3765. stats.power_divergence(f_obs, f_exp=xp.asarray([30., 60.]))
  3766. with pytest.raises(ValueError, match=message):
  3767. stats.power_divergence(f_obs, f_exp=f_exp, axis=1)
  3768. stat, pval = stats.power_divergence(f_obs, f_exp=f_exp)
  3769. xp_assert_close(stat, xp.asarray([5.71428571, 2.66666667]))
  3770. xp_assert_close(pval, xp.asarray([0.01682741, 0.10247043]))
  3771. def test_power_divergence_against_cressie_read_data(self, xp):
  3772. # Test stats.power_divergence against tables 4 and 5 from
  3773. # Cressie and Read, "Multimonial Goodness-of-Fit Tests",
  3774. # J. R. Statist. Soc. B (1984), Vol 46, No. 3, pp. 440-464.
  3775. # This tests the calculation for several values of lambda.
  3776. # Table 4 data recalculated for greater precision according to:
  3777. # Shelby J. Haberman, Analysis of Qualitative Data: Volume 1
  3778. # Introductory Topics, Academic Press, New York, USA (1978).
  3779. obs = xp.asarray([15., 11., 14., 17., 5., 11., 10., 4., 8.,
  3780. 10., 7., 9., 11., 3., 6., 1., 1., 4.])
  3781. beta = -0.083769 # Haberman (1978), p. 15
  3782. i = xp.arange(1., obs.shape[0] + 1.)
  3783. alpha = xp.log(xp.sum(obs) / xp.sum(xp.exp(beta*i)))
  3784. expected_counts = xp.exp(alpha + beta*i)
  3785. # `table4` holds just the second and third columns from Table 4.
  3786. table4 = xp.concat((obs[xp.newaxis, :],
  3787. expected_counts[xp.newaxis, :])).T
  3788. table5 = xp.asarray([
  3789. # lambda, statistic
  3790. -10.0, 72.2e3,
  3791. -5.0, 28.9e1,
  3792. -3.0, 65.6,
  3793. -2.0, 40.6,
  3794. -1.5, 34.0,
  3795. -1.0, 29.5,
  3796. -0.5, 26.5,
  3797. 0.0, 24.6,
  3798. 0.5, 23.4,
  3799. 0.67, 23.1,
  3800. 1.0, 22.7,
  3801. 1.5, 22.6,
  3802. 2.0, 22.9,
  3803. 3.0, 24.8,
  3804. 5.0, 35.5,
  3805. 10.0, 21.4e1,
  3806. ])
  3807. table5 = xp.reshape(table5, (-1, 2))
  3808. for i in range(table5.shape[0]):
  3809. lambda_, expected_stat = table5[i, 0], table5[i, 1]
  3810. stat, p = stats.power_divergence(table4[:,0], table4[:,1],
  3811. lambda_=lambda_)
  3812. xp_assert_close(stat, expected_stat, rtol=5e-3)
  3813. @make_xp_test_case(stats.chisquare)
  3814. class TestChisquare:
  3815. def test_chisquare_12282a(self, xp):
  3816. # Currently `chisquare` is implemented via power_divergence
  3817. # in case that ever changes, perform a basic test like
  3818. # test_power_divergence_gh_12282
  3819. with assert_raises(ValueError, match='For each axis slice...'):
  3820. f_obs = xp.asarray([10., 20.])
  3821. f_exp = xp.asarray([30., 60.])
  3822. stats.chisquare(f_obs, f_exp=f_exp)
  3823. def test_chisquare_12282b(self, xp):
  3824. # Check that users can now disable the sum check tested in
  3825. # test_chisquare_12282a. Also, confirm that statistic and p-value
  3826. # are as expected.
  3827. rng = np.random.default_rng(3843874358728234)
  3828. n = 10
  3829. lam = rng.uniform(1000, 2000, size=n)
  3830. x = rng.poisson(lam)
  3831. lam = xp.asarray(lam)
  3832. x = xp.asarray(x, dtype=lam.dtype)
  3833. res = stats.chisquare(x, f_exp=lam, ddof=-1, sum_check=False)
  3834. # Poisson is approximately normal with mean and variance lam
  3835. z = (x - lam) / xp.sqrt(lam)
  3836. statistic = xp.sum(z**2)
  3837. xp_assert_close(res.statistic, statistic)
  3838. # Sum of `n` squared standard normal variables follows chi2 with `n` DoF
  3839. X2 = _SimpleChi2(xp.asarray(n, dtype=statistic.dtype))
  3840. xp_assert_close(res.pvalue, X2.sf(statistic))
  3841. @pytest.mark.parametrize("n, dtype", [(200, 'uint8'), (1000000, 'int32')])
  3842. def test_chiquare_data_types_attributes(self, n, dtype, xp):
  3843. # Regression test for gh-10159 and gh-18368
  3844. dtype = getattr(xp, dtype)
  3845. obs = xp.asarray([n, 0], dtype=dtype)
  3846. exp = xp.asarray([n // 2, n // 2], dtype=dtype)
  3847. res = stats.chisquare(obs, exp)
  3848. stat, p = res
  3849. xp_assert_close(stat, xp.asarray(n, dtype=xp.asarray(1.).dtype), rtol=1e-13)
  3850. # check that attributes are identical to unpacked outputs - see gh-18368
  3851. xp_assert_equal(res.statistic, stat)
  3852. xp_assert_equal(res.pvalue, p)
  3853. @make_xp_test_case(stats.friedmanchisquare)
  3854. class TestFriedmanChiSquare:
  3855. # verified with matlab and R
  3856. # From Demsar "Statistical Comparisons of Classifiers over Multiple Data Sets"
  3857. # 2006, Xf=9.28 (no tie handling, tie corrected Xf >=9.28)
  3858. x1 = [[0.763, 0.599, 0.954, 0.628, 0.882, 0.936, 0.661,
  3859. 0.583, 0.775, 1.0, 0.94, 0.619, 0.972, 0.957],
  3860. [0.768, 0.591, 0.971, 0.661, 0.888, 0.931, 0.668,
  3861. 0.583, 0.838, 1.0, 0.962, 0.666, 0.981, 0.978],
  3862. [0.771, 0.590, 0.968, 0.654, 0.886, 0.916, 0.609,
  3863. 0.563, 0.866, 1.0, 0.965, 0.614, 0.9751, 0.946],
  3864. [0.798, 0.569, 0.967, 0.657, 0.898, 0.931, 0.685,
  3865. 0.625, 0.875, 1.0, 0.962, 0.669, 0.975, 0.970]]
  3866. ref1 = (10.2283464566929, 0.0167215803284414)
  3867. # From "Bioestadistica para las ciencias de la salud" Xf=18.95 p<0.001:
  3868. x2 = [[4, 3, 5, 3, 5, 3, 2, 5, 4, 4, 4, 3],
  3869. [2, 2, 1, 2, 3, 1, 2, 3, 2, 1, 1, 3],
  3870. [2, 4, 3, 3, 4, 3, 3, 4, 4, 1, 2, 1],
  3871. [3, 5, 4, 3, 4, 4, 3, 3, 3, 4, 4, 4]]
  3872. ref2 = (18.9428571428571, 0.000280938375189499)
  3873. # From Jerrorl H. Zar, "Biostatistical Analysis"(example 12.6),
  3874. # Xf=10.68, 0.005 < p < 0.01:
  3875. # Probability from this example is inexact
  3876. # using Chisquare approximation of Friedman Chisquare.
  3877. x3 = [[7.0, 9.9, 8.5, 5.1, 10.3],
  3878. [5.3, 5.7, 4.7, 3.5, 7.7],
  3879. [4.9, 7.6, 5.5, 2.8, 8.4],
  3880. [8.8, 8.9, 8.1, 3.3, 9.1]]
  3881. ref3 = (10.68, 0.0135882729582176)
  3882. @pytest.mark.parametrize("dtype", [None, "float32", "float64"])
  3883. @pytest.mark.parametrize("data, ref", [(x1, ref1), (x2, ref2), (x3, ref3)])
  3884. def test_against_references(self, dtype, data, ref, xp):
  3885. if is_numpy(xp) and xp.__version__ < "2.0" and dtype=='float32':
  3886. pytest.skip("NumPy doesn't preserve dtype pre-NEP 50.")
  3887. dtype = dtype if dtype is None else getattr(xp, dtype)
  3888. data = [xp.asarray(array, dtype=dtype) for array in data]
  3889. res = stats.friedmanchisquare(*data)
  3890. xp_assert_close(res.statistic, xp.asarray(ref[0], dtype=dtype))
  3891. xp_assert_close(res.pvalue, xp.asarray(ref[1], dtype=dtype))
  3892. def test_too_few_samples(self, xp):
  3893. message = "At least 3 samples must be given"
  3894. with pytest.raises(ValueError, match=message):
  3895. stats.friedmanchisquare(xp.asarray(self.x3[0]), xp.asarray(self.x3[1]))
  3896. class TestKSTest:
  3897. """Tests kstest and ks_1samp agree with K-S various sizes, alternatives, modes."""
  3898. def _testOne(self, x, alternative, expected_statistic, expected_prob,
  3899. mode='auto', decimal=14):
  3900. result = stats.kstest(x, 'norm', alternative=alternative, mode=mode)
  3901. expected = np.array([expected_statistic, expected_prob])
  3902. assert_array_almost_equal(np.array(result), expected, decimal=decimal)
  3903. def _test_kstest_and_ks1samp(self, x, alternative, mode='auto', decimal=14):
  3904. result = stats.kstest(x, 'norm', alternative=alternative, mode=mode)
  3905. result_1samp = stats.ks_1samp(x, stats.norm.cdf,
  3906. alternative=alternative, mode=mode)
  3907. assert_array_almost_equal(np.array(result), result_1samp, decimal=decimal)
  3908. def test_namedtuple_attributes(self):
  3909. x = np.linspace(-1, 1, 9)
  3910. # test for namedtuple attribute results
  3911. attributes = ('statistic', 'pvalue')
  3912. res = stats.kstest(x, 'norm')
  3913. check_named_results(res, attributes)
  3914. def test_agree_with_ks_1samp(self):
  3915. x = np.linspace(-1, 1, 9)
  3916. self._test_kstest_and_ks1samp(x, 'two-sided')
  3917. x = np.linspace(-15, 15, 9)
  3918. self._test_kstest_and_ks1samp(x, 'two-sided')
  3919. x = [-1.23, 0.06, -0.60, 0.17, 0.66, -0.17, -0.08, 0.27, -0.98, -0.99]
  3920. self._test_kstest_and_ks1samp(x, 'two-sided')
  3921. self._test_kstest_and_ks1samp(x, 'greater', mode='exact')
  3922. self._test_kstest_and_ks1samp(x, 'less', mode='exact')
  3923. def test_pm_inf_gh20386(self):
  3924. # Check that gh-20386 is resolved - `kstest` does not
  3925. # return NaNs when both -inf and inf are in sample.
  3926. vals = [-np.inf, 0, 1, np.inf]
  3927. res = stats.kstest(vals, stats.cauchy.cdf)
  3928. ref = stats.kstest(vals, stats.cauchy.cdf, _no_deco=True)
  3929. assert np.all(np.isfinite(res))
  3930. assert_equal(res, ref)
  3931. assert not np.isnan(res.statistic)
  3932. assert not np.isnan(res.pvalue)
  3933. # missing: no test that uses *args
  3934. @make_xp_test_case(stats.ks_1samp)
  3935. class TestKSOneSample:
  3936. """
  3937. Tests kstest and ks_samp 1-samples with K-S various sizes, alternatives, modes.
  3938. """
  3939. def _testOne(self, x, alternative, expected_statistic, expected_prob, *,
  3940. mode='auto', dtype, xp):
  3941. rtol = 5e-14 if dtype == xp.float64 else 1e-5
  3942. res = stats.ks_1samp(x, special.ndtr, alternative=alternative, mode=mode)
  3943. ref_statistic = xp.asarray(expected_statistic, dtype=dtype)
  3944. ref_pvalue = xp.asarray(expected_prob, dtype=dtype)
  3945. xp_assert_close(res.statistic, ref_statistic, rtol=rtol)
  3946. xp_assert_close(res.pvalue, ref_pvalue, rtol=rtol)
  3947. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  3948. def test_agree_with_r(self, dtype, xp):
  3949. # comparing with some values from R
  3950. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  3951. pytest.skip("Pre-NEP 50 doesn't respect dtypes")
  3952. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  3953. x = xp.linspace(-1, 1, 9, dtype=dtype)
  3954. self._testOne(x, 'two-sided', 0.15865525393145705, 0.95164069201518386,
  3955. dtype=dtype, xp=xp)
  3956. x = xp.linspace(-15, 15, 9, dtype=dtype)
  3957. self._testOne(x, 'two-sided', 0.44435602715924361, 0.038850140086788665,
  3958. dtype=dtype, xp=xp)
  3959. x = [-1.23, 0.06, -0.60, 0.17, 0.66, -0.17, -0.08, 0.27, -0.98, -0.99]
  3960. x = xp.asarray(x, dtype=dtype)
  3961. self._testOne(x, 'two-sided', 0.293580126801961, 0.293408463684361,
  3962. dtype=dtype, xp=xp)
  3963. self._testOne(x, 'greater', 0.293580126801961, 0.146988835042376, mode='exact',
  3964. dtype=dtype, xp=xp)
  3965. self._testOne(x, 'less', 0.109348552425692, 0.732768892470675, mode='exact',
  3966. dtype=dtype, xp=xp)
  3967. @pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
  3968. def test_known_examples(self, xp, dtype):
  3969. # the following tests rely on deterministically replicated rvs
  3970. if is_numpy(xp) and xp.__version__ < "2.0" and dtype == 'float32':
  3971. pytest.skip("Pre-NEP 50 doesn't respect dtypes")
  3972. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  3973. x = stats.norm.rvs(loc=0.2, size=100, random_state=987654321)
  3974. x = xp.asarray(x, dtype=dtype)
  3975. self._testOne(x, 'two-sided', 0.12464329735846891, 0.089444888711820769,
  3976. mode='asymp', xp=xp, dtype=dtype)
  3977. self._testOne(x, 'less', 0.12464329735846891, 0.040989164077641749,
  3978. xp=xp, dtype=dtype)
  3979. self._testOne(x, 'greater', 0.0072115233216310994, 0.98531158590396228,
  3980. xp=xp, dtype=dtype)
  3981. # this is a test of the exact p-value calculation, available only with NumPy.
  3982. def test_ks1samp_allpaths(self):
  3983. # Check NaN input, output.
  3984. assert_(np.isnan(kolmogn(np.nan, 1, True)))
  3985. with assert_raises(ValueError, match='n is not integral: 1.5'):
  3986. kolmogn(1.5, 1, True)
  3987. assert_(np.isnan(kolmogn(-1, 1, True)))
  3988. dataset = np.asarray([
  3989. # Check x out of range
  3990. (101, 1, True, 1.0),
  3991. (101, 1.1, True, 1.0),
  3992. (101, 0, True, 0.0),
  3993. (101, -0.1, True, 0.0),
  3994. (32, 1.0 / 64, True, 0.0), # Ruben-Gambino
  3995. (32, 1.0 / 64, False, 1.0), # Ruben-Gambino
  3996. # Miller
  3997. (32, 0.5, True, 0.9999999363163307),
  3998. # Miller 2 * special.smirnov(32, 0.5)
  3999. (32, 0.5, False, 6.368366937916623e-08),
  4000. # Check some other paths
  4001. (32, 1.0 / 8, True, 0.34624229979775223),
  4002. (32, 1.0 / 4, True, 0.9699508336558085),
  4003. (1600, 0.49, False, 0.0),
  4004. # 2 * special.smirnov(1600, 1/16.0)
  4005. (1600, 1 / 16.0, False, 7.0837876229702195e-06),
  4006. # _kolmogn_DMTW
  4007. (1600, 14 / 1600, False, 0.99962357317602),
  4008. # _kolmogn_PelzGood
  4009. (1600, 1 / 32, False, 0.08603386296651416),
  4010. ])
  4011. FuncData(kolmogn, dataset, (0, 1, 2), 3).check(dtypes=[int, float, bool])
  4012. @pytest.mark.parametrize("ksfunc", [stats.kstest, stats.ks_1samp])
  4013. @pytest.mark.parametrize("alternative, x6val, ref_location, ref_sign",
  4014. [('greater', 6., 6., +1),
  4015. ('less', 7., 7., -1),
  4016. ('two-sided', 6., 6., +1),
  4017. ('two-sided', 7., 7., -1)])
  4018. def test_location_sign(self, ksfunc, alternative,
  4019. x6val, ref_location, ref_sign, xp):
  4020. # Test that location and sign corresponding with statistic are as
  4021. # expected. (Test is designed to be easy to predict.)
  4022. x = xp.arange(10.) + 0.5
  4023. x = xpx.at(x)[6].set(x6val)
  4024. # cdf = stats.uniform(scale=10).cdf
  4025. def cdf(x): return x / 10.
  4026. res = ksfunc(xp.asarray(x), cdf, alternative=alternative)
  4027. rtol = 1e-15 if x.dtype == xp.float64 else 1e-6
  4028. xp_assert_close(res.statistic, xp.asarray(0.1), rtol=rtol)
  4029. xp_assert_equal(res.statistic_location, xp.asarray(ref_location))
  4030. xp_assert_equal(res.statistic_sign, xp.asarray(ref_sign, dtype=xp.int8))
  4031. # missing: no test that uses *args
  4032. class TestKSTwoSamples:
  4033. """Tests 2-samples with K-S various sizes, alternatives, modes."""
  4034. def _testOne(self, x1, x2, alternative, expected_statistic, expected_prob,
  4035. mode='auto'):
  4036. result = stats.ks_2samp(x1, x2, alternative, mode=mode)
  4037. expected = np.array([expected_statistic, expected_prob])
  4038. assert_array_almost_equal(np.array(result), expected)
  4039. def testSmall(self):
  4040. self._testOne([0], [1], 'two-sided', 1.0/1, 1.0)
  4041. self._testOne([0], [1], 'greater', 1.0/1, 0.5)
  4042. self._testOne([0], [1], 'less', 0.0/1, 1.0)
  4043. self._testOne([1], [0], 'two-sided', 1.0/1, 1.0)
  4044. self._testOne([1], [0], 'greater', 0.0/1, 1.0)
  4045. self._testOne([1], [0], 'less', 1.0/1, 0.5)
  4046. def testTwoVsThree(self):
  4047. data1 = np.array([1.0, 2.0])
  4048. data1p = data1 + 0.01
  4049. data1m = data1 - 0.01
  4050. data2 = np.array([1.0, 2.0, 3.0])
  4051. self._testOne(data1p, data2, 'two-sided', 1.0 / 3, 1.0)
  4052. self._testOne(data1p, data2, 'greater', 1.0 / 3, 0.7)
  4053. self._testOne(data1p, data2, 'less', 1.0 / 3, 0.7)
  4054. self._testOne(data1m, data2, 'two-sided', 2.0 / 3, 0.6)
  4055. self._testOne(data1m, data2, 'greater', 2.0 / 3, 0.3)
  4056. self._testOne(data1m, data2, 'less', 0, 1.0)
  4057. def testTwoVsFour(self):
  4058. data1 = np.array([1.0, 2.0])
  4059. data1p = data1 + 0.01
  4060. data1m = data1 - 0.01
  4061. data2 = np.array([1.0, 2.0, 3.0, 4.0])
  4062. self._testOne(data1p, data2, 'two-sided', 2.0 / 4, 14.0/15)
  4063. self._testOne(data1p, data2, 'greater', 2.0 / 4, 8.0/15)
  4064. self._testOne(data1p, data2, 'less', 1.0 / 4, 12.0/15)
  4065. self._testOne(data1m, data2, 'two-sided', 3.0 / 4, 6.0/15)
  4066. self._testOne(data1m, data2, 'greater', 3.0 / 4, 3.0/15)
  4067. self._testOne(data1m, data2, 'less', 0, 1.0)
  4068. def test100_100(self):
  4069. x100 = np.linspace(1, 100, 100)
  4070. x100_2_p1 = x100 + 2 + 0.1
  4071. x100_2_m1 = x100 + 2 - 0.1
  4072. self._testOne(x100, x100_2_p1, 'two-sided', 3.0 / 100, 0.9999999999962055)
  4073. self._testOne(x100, x100_2_p1, 'greater', 3.0 / 100, 0.9143290114276248)
  4074. self._testOne(x100, x100_2_p1, 'less', 0, 1.0)
  4075. self._testOne(x100, x100_2_m1, 'two-sided', 2.0 / 100, 1.0)
  4076. self._testOne(x100, x100_2_m1, 'greater', 2.0 / 100, 0.960978450786184)
  4077. self._testOne(x100, x100_2_m1, 'less', 0, 1.0)
  4078. def test100_110(self):
  4079. x100 = np.linspace(1, 100, 100)
  4080. x110 = np.linspace(1, 100, 110)
  4081. x110_20_p1 = x110 + 20 + 0.1
  4082. x110_20_m1 = x110 + 20 - 0.1
  4083. # 100, 110
  4084. self._testOne(x100, x110_20_p1, 'two-sided', 232.0 / 1100, 0.015739183865607353)
  4085. self._testOne(x100, x110_20_p1, 'greater', 232.0 / 1100, 0.007869594319053203)
  4086. self._testOne(x100, x110_20_p1, 'less', 0, 1)
  4087. self._testOne(x100, x110_20_m1, 'two-sided', 229.0 / 1100, 0.017803803861026313)
  4088. self._testOne(x100, x110_20_m1, 'greater', 229.0 / 1100, 0.008901905958245056)
  4089. self._testOne(x100, x110_20_m1, 'less', 0.0, 1.0)
  4090. def testRepeatedValues(self):
  4091. x2233 = np.array([2] * 3 + [3] * 4 + [5] * 5 + [6] * 4, dtype=int)
  4092. x3344 = x2233 + 1
  4093. x2356 = np.array([2] * 3 + [3] * 4 + [5] * 10 + [6] * 4, dtype=int)
  4094. x3467 = np.array([3] * 10 + [4] * 2 + [6] * 10 + [7] * 4, dtype=int)
  4095. self._testOne(x2233, x3344, 'two-sided', 5.0/16, 0.4262934613454952)
  4096. self._testOne(x2233, x3344, 'greater', 5.0/16, 0.21465428276573786)
  4097. self._testOne(x2233, x3344, 'less', 0.0/16, 1.0)
  4098. self._testOne(x2356, x3467, 'two-sided', 190.0/21/26, 0.0919245790168125)
  4099. self._testOne(x2356, x3467, 'greater', 190.0/21/26, 0.0459633806858544)
  4100. self._testOne(x2356, x3467, 'less', 70.0/21/26, 0.6121593130022775)
  4101. def testEqualSizes(self):
  4102. data2 = np.array([1.0, 2.0, 3.0])
  4103. self._testOne(data2, data2+1, 'two-sided', 1.0/3, 1.0)
  4104. self._testOne(data2, data2+1, 'greater', 1.0/3, 0.75)
  4105. self._testOne(data2, data2+1, 'less', 0.0/3, 1.)
  4106. self._testOne(data2, data2+0.5, 'two-sided', 1.0/3, 1.0)
  4107. self._testOne(data2, data2+0.5, 'greater', 1.0/3, 0.75)
  4108. self._testOne(data2, data2+0.5, 'less', 0.0/3, 1.)
  4109. self._testOne(data2, data2-0.5, 'two-sided', 1.0/3, 1.0)
  4110. self._testOne(data2, data2-0.5, 'greater', 0.0/3, 1.0)
  4111. self._testOne(data2, data2-0.5, 'less', 1.0/3, 0.75)
  4112. @pytest.mark.slow
  4113. def testMiddlingBoth(self):
  4114. # 500, 600
  4115. n1, n2 = 500, 600
  4116. delta = 1.0/n1/n2/2/2
  4117. x = np.linspace(1, 200, n1) - delta
  4118. y = np.linspace(2, 200, n2)
  4119. self._testOne(x, y, 'two-sided', 2000.0 / n1 / n2, 1.0,
  4120. mode='auto')
  4121. self._testOne(x, y, 'two-sided', 2000.0 / n1 / n2, 1.0,
  4122. mode='asymp')
  4123. self._testOne(x, y, 'greater', 2000.0 / n1 / n2, 0.9697596024683929,
  4124. mode='asymp')
  4125. self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021,
  4126. mode='asymp')
  4127. with warnings.catch_warnings():
  4128. message = "ks_2samp: Exact calculation unsuccessful."
  4129. warnings.filterwarnings("ignore", message, RuntimeWarning)
  4130. self._testOne(x, y, 'greater', 2000.0 / n1 / n2, 0.9697596024683929,
  4131. mode='exact')
  4132. self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021,
  4133. mode='exact')
  4134. with warnings.catch_warnings(record=True) as w:
  4135. warnings.simplefilter("always")
  4136. self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021,
  4137. mode='exact')
  4138. _check_warnings(w, RuntimeWarning, 1)
  4139. @pytest.mark.slow
  4140. def testMediumBoth(self):
  4141. # 1000, 1100
  4142. n1, n2 = 1000, 1100
  4143. delta = 1.0/n1/n2/2/2
  4144. x = np.linspace(1, 200, n1) - delta
  4145. y = np.linspace(2, 200, n2)
  4146. self._testOne(x, y, 'two-sided', 6600.0 / n1 / n2, 1.0,
  4147. mode='asymp')
  4148. self._testOne(x, y, 'two-sided', 6600.0 / n1 / n2, 1.0,
  4149. mode='auto')
  4150. self._testOne(x, y, 'greater', 6600.0 / n1 / n2, 0.9573185808092622,
  4151. mode='asymp')
  4152. self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984,
  4153. mode='asymp')
  4154. with warnings.catch_warnings():
  4155. message = "ks_2samp: Exact calculation unsuccessful."
  4156. warnings.filterwarnings("ignore", message, RuntimeWarning)
  4157. self._testOne(x, y, 'greater', 6600.0 / n1 / n2, 0.9573185808092622,
  4158. mode='exact')
  4159. self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984,
  4160. mode='exact')
  4161. with warnings.catch_warnings(record=True) as w:
  4162. warnings.simplefilter("always")
  4163. self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984,
  4164. mode='exact')
  4165. _check_warnings(w, RuntimeWarning, 1)
  4166. def testLarge(self):
  4167. # 10000, 110
  4168. n1, n2 = 10000, 110
  4169. lcm = n1*11.0
  4170. delta = 1.0/n1/n2/2/2
  4171. x = np.linspace(1, 200, n1) - delta
  4172. y = np.linspace(2, 100, n2)
  4173. self._testOne(x, y, 'two-sided', 55275.0 / lcm, 4.2188474935755949e-15)
  4174. self._testOne(x, y, 'greater', 561.0 / lcm, 0.99115454582047591)
  4175. self._testOne(x, y, 'less', 55275.0 / lcm, 3.1317328311518713e-26)
  4176. def test_gh11184(self):
  4177. # 3000, 3001, exact two-sided
  4178. rng = np.random.RandomState(123456)
  4179. x = rng.normal(size=3000)
  4180. y = rng.normal(size=3001) * 1.5
  4181. self._testOne(x, y, 'two-sided', 0.11292880151060758, 2.7755575615628914e-15,
  4182. mode='asymp')
  4183. self._testOne(x, y, 'two-sided', 0.11292880151060758, 2.7755575615628914e-15,
  4184. mode='exact')
  4185. @pytest.mark.xslow
  4186. def test_gh11184_bigger(self):
  4187. # 10000, 10001, exact two-sided
  4188. rng = np.random.RandomState(123456)
  4189. x = rng.normal(size=10000)
  4190. y = rng.normal(size=10001) * 1.5
  4191. self._testOne(x, y, 'two-sided', 0.10597913208679133, 3.3149311398483503e-49,
  4192. mode='asymp')
  4193. self._testOne(x, y, 'two-sided', 0.10597913208679133, 2.7755575615628914e-15,
  4194. mode='exact')
  4195. self._testOne(x, y, 'greater', 0.10597913208679133, 2.7947433906389253e-41,
  4196. mode='asymp')
  4197. self._testOne(x, y, 'less', 0.09658002199780022, 2.7947433906389253e-41,
  4198. mode='asymp')
  4199. @pytest.mark.xslow
  4200. def test_gh12999(self):
  4201. rng = np.random.RandomState(123456)
  4202. for x in range(1000, 12000, 1000):
  4203. vals1 = rng.normal(size=(x))
  4204. vals2 = rng.normal(size=(x + 10), loc=0.5)
  4205. exact = stats.ks_2samp(vals1, vals2, mode='exact').pvalue
  4206. asymp = stats.ks_2samp(vals1, vals2, mode='asymp').pvalue
  4207. # these two p-values should be in line with each other
  4208. assert_array_less(exact, 3 * asymp)
  4209. assert_array_less(asymp, 3 * exact)
  4210. @pytest.mark.slow
  4211. def testLargeBoth(self):
  4212. # 10000, 11000
  4213. n1, n2 = 10000, 11000
  4214. lcm = n1*11.0
  4215. delta = 1.0/n1/n2/2/2
  4216. x = np.linspace(1, 200, n1) - delta
  4217. y = np.linspace(2, 200, n2)
  4218. self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.9990660108966576,
  4219. mode='asymp')
  4220. self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.9990456491488628,
  4221. mode='exact')
  4222. self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.9990660108966576,
  4223. mode='auto')
  4224. self._testOne(x, y, 'greater', 563.0 / lcm, 0.7561851877420673)
  4225. self._testOne(x, y, 'less', 10.0 / lcm, 0.9998239693191724)
  4226. with warnings.catch_warnings():
  4227. message = "ks_2samp: Exact calculation unsuccessful."
  4228. warnings.filterwarnings("ignore", message, RuntimeWarning)
  4229. self._testOne(x, y, 'greater', 563.0 / lcm, 0.7561851877420673,
  4230. mode='exact')
  4231. self._testOne(x, y, 'less', 10.0 / lcm, 0.9998239693191724,
  4232. mode='exact')
  4233. def testNamedAttributes(self):
  4234. # test for namedtuple attribute results
  4235. attributes = ('statistic', 'pvalue')
  4236. res = stats.ks_2samp([1, 2], [3])
  4237. check_named_results(res, attributes)
  4238. @pytest.mark.slow
  4239. def test_some_code_paths(self):
  4240. # Check that some code paths are executed
  4241. from scipy.stats._stats_py import (
  4242. _count_paths_outside_method,
  4243. _compute_outer_prob_inside_method
  4244. )
  4245. _compute_outer_prob_inside_method(1, 1, 1, 1)
  4246. _count_paths_outside_method(1000, 1, 1, 1001)
  4247. with np.errstate(invalid='raise'):
  4248. assert_raises(FloatingPointError, _count_paths_outside_method,
  4249. 1100, 1099, 1, 1)
  4250. assert_raises(FloatingPointError, _count_paths_outside_method,
  4251. 2000, 1000, 1, 1)
  4252. @pytest.mark.parametrize('case', (([], [1]), ([1], []), ([], [])))
  4253. def test_argument_checking(self, case):
  4254. # Check that an empty array warns
  4255. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  4256. res = stats.ks_2samp(*case)
  4257. assert_equal(res.statistic, np.nan)
  4258. assert_equal(res.pvalue, np.nan)
  4259. @pytest.mark.xslow
  4260. def test_gh12218(self):
  4261. """Ensure gh-12218 is fixed."""
  4262. # gh-1228 triggered a TypeError calculating sqrt(n1*n2*(n1+n2)).
  4263. # n1, n2 both large integers, the product exceeded 2^64
  4264. rng = np.random.default_rng(8751495592)
  4265. n1 = 2097152 # 2*^21
  4266. rvs1 = stats.uniform.rvs(size=n1, loc=0., scale=1, random_state=rng)
  4267. rvs2 = rvs1 + 1 # Exact value of rvs2 doesn't matter.
  4268. stats.ks_2samp(rvs1, rvs2, alternative='greater', mode='asymp')
  4269. stats.ks_2samp(rvs1, rvs2, alternative='less', mode='asymp')
  4270. stats.ks_2samp(rvs1, rvs2, alternative='two-sided', mode='asymp')
  4271. def test_warnings_gh_14019(self):
  4272. # Check that RuntimeWarning is raised when method='auto' and exact
  4273. # p-value calculation fails. See gh-14019.
  4274. rng = np.random.RandomState(seed=23493549)
  4275. # random samples of the same size as in the issue
  4276. data1 = rng.random(size=881) + 0.5
  4277. data2 = rng.random(size=369)
  4278. message = "ks_2samp: Exact calculation unsuccessful"
  4279. with pytest.warns(RuntimeWarning, match=message):
  4280. res = stats.ks_2samp(data1, data2, alternative='less')
  4281. assert_allclose(res.pvalue, 0, atol=1e-14)
  4282. @pytest.mark.parametrize("ksfunc", [stats.kstest, stats.ks_2samp])
  4283. @pytest.mark.parametrize("alternative, x6val, ref_location, ref_sign",
  4284. [('greater', 5.9, 5.9, +1),
  4285. ('less', 6.1, 6.0, -1),
  4286. ('two-sided', 5.9, 5.9, +1),
  4287. ('two-sided', 6.1, 6.0, -1)])
  4288. def test_location_sign(self, ksfunc, alternative,
  4289. x6val, ref_location, ref_sign):
  4290. # Test that location and sign corresponding with statistic are as
  4291. # expected. (Test is designed to be easy to predict.)
  4292. x = np.arange(10, dtype=np.float64)
  4293. y = x.copy()
  4294. x[6] = x6val
  4295. res = stats.ks_2samp(x, y, alternative=alternative)
  4296. assert res.statistic == 0.1
  4297. assert res.statistic_location == ref_location
  4298. assert res.statistic_sign == ref_sign
  4299. def test_ttest_rel():
  4300. # regression test
  4301. tr,pr = 0.81248591389165692, 0.41846234511362157
  4302. tpr = ([tr,-tr],[pr,pr])
  4303. rvs1 = np.linspace(1,100,100)
  4304. rvs2 = np.linspace(1.01,99.989,100)
  4305. rvs1_2D = np.array([np.linspace(1,100,100), np.linspace(1.01,99.989,100)])
  4306. rvs2_2D = np.array([np.linspace(1.01,99.989,100), np.linspace(1,100,100)])
  4307. t,p = stats.ttest_rel(rvs1, rvs2, axis=0)
  4308. assert_array_almost_equal([t,p],(tr,pr))
  4309. t,p = stats.ttest_rel(rvs1_2D.T, rvs2_2D.T, axis=0)
  4310. assert_array_almost_equal([t,p],tpr)
  4311. t,p = stats.ttest_rel(rvs1_2D, rvs2_2D, axis=1)
  4312. assert_array_almost_equal([t,p],tpr)
  4313. # test scalars
  4314. with warnings.catch_warnings(), \
  4315. np.errstate(invalid="ignore", divide="ignore"):
  4316. warnings.filterwarnings(
  4317. "ignore", "Degrees of freedom <= 0 for slice", RuntimeWarning)
  4318. t, p = stats.ttest_rel(4., 3.)
  4319. assert_(np.isnan(t))
  4320. assert_(np.isnan(p))
  4321. # test for namedtuple attribute results
  4322. attributes = ('statistic', 'pvalue')
  4323. res = stats.ttest_rel(rvs1, rvs2, axis=0)
  4324. check_named_results(res, attributes)
  4325. # test on 3 dimensions
  4326. rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
  4327. rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
  4328. t,p = stats.ttest_rel(rvs1_3D, rvs2_3D, axis=1)
  4329. assert_array_almost_equal(np.abs(t), tr)
  4330. assert_array_almost_equal(np.abs(p), pr)
  4331. assert_equal(t.shape, (2, 3))
  4332. t, p = stats.ttest_rel(np.moveaxis(rvs1_3D, 2, 0),
  4333. np.moveaxis(rvs2_3D, 2, 0),
  4334. axis=2)
  4335. assert_array_almost_equal(np.abs(t), tr)
  4336. assert_array_almost_equal(np.abs(p), pr)
  4337. assert_equal(t.shape, (3, 2))
  4338. # test alternative parameter
  4339. assert_raises(ValueError, stats.ttest_rel, rvs1, rvs2, alternative="error")
  4340. t, p = stats.ttest_rel(rvs1, rvs2, axis=0, alternative="less")
  4341. assert_allclose(p, 1 - pr/2)
  4342. assert_allclose(t, tr)
  4343. t, p = stats.ttest_rel(rvs1, rvs2, axis=0, alternative="greater")
  4344. assert_allclose(p, pr/2)
  4345. assert_allclose(t, tr)
  4346. # check nan policy
  4347. rng = np.random.RandomState(12345678)
  4348. x = stats.norm.rvs(loc=5, scale=10, size=501, random_state=rng)
  4349. x[500] = np.nan
  4350. y = (stats.norm.rvs(loc=5, scale=10, size=501, random_state=rng) +
  4351. stats.norm.rvs(scale=0.2, size=501, random_state=rng))
  4352. y[500] = np.nan
  4353. with np.errstate(invalid="ignore"):
  4354. assert_array_equal(stats.ttest_rel(x, x), (np.nan, np.nan))
  4355. assert_array_almost_equal(stats.ttest_rel(x, y, nan_policy='omit'),
  4356. (0.25299925303978066, 0.8003729814201519))
  4357. assert_raises(ValueError, stats.ttest_rel, x, y, nan_policy='raise')
  4358. assert_raises(ValueError, stats.ttest_rel, x, y, nan_policy='foobar')
  4359. # test zero division problem
  4360. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  4361. t, p = stats.ttest_rel([0, 0, 0], [1, 1, 1])
  4362. assert_equal((np.abs(t), p), (np.inf, 0))
  4363. with np.errstate(invalid="ignore"):
  4364. assert_equal(stats.ttest_rel([0, 0, 0], [0, 0, 0]), (np.nan, np.nan))
  4365. # check that nan in input array result in nan output
  4366. anan = np.array([[1, np.nan], [-1, 1]])
  4367. assert_equal(stats.ttest_rel(anan, np.zeros((2, 2))),
  4368. ([0, np.nan], [1, np.nan]))
  4369. # test incorrect input shape raise an error
  4370. x = np.arange(24)
  4371. assert_raises(ValueError, stats.ttest_rel, x.reshape((8, 3)),
  4372. x.reshape((2, 3, 4)))
  4373. # Convert from two-sided p-values to one sided using T result data.
  4374. def convert(t, p, alt):
  4375. if (t < 0 and alt == "less") or (t > 0 and alt == "greater"):
  4376. return p / 2
  4377. return 1 - (p / 2)
  4378. converter = np.vectorize(convert)
  4379. rvs1_2D[:, 20:30] = np.nan
  4380. rvs2_2D[:, 15:25] = np.nan
  4381. with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
  4382. tr, pr = stats.ttest_rel(rvs1_2D, rvs2_2D, 0, nan_policy='omit')
  4383. with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
  4384. t, p = stats.ttest_rel(rvs1_2D, rvs2_2D, 0,
  4385. nan_policy='omit', alternative='less')
  4386. assert_allclose(t, tr, rtol=1e-14)
  4387. with np.errstate(invalid='ignore'):
  4388. assert_allclose(p, converter(tr, pr, 'less'), rtol=1e-14)
  4389. with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
  4390. t, p = stats.ttest_rel(rvs1_2D, rvs2_2D, 0,
  4391. nan_policy='omit', alternative='greater')
  4392. assert_allclose(t, tr, rtol=1e-14)
  4393. with np.errstate(invalid='ignore'):
  4394. assert_allclose(p, converter(tr, pr, 'greater'), rtol=1e-14)
  4395. def test_ttest_rel_nan_2nd_arg():
  4396. # regression test for gh-6134: nans in the second arg were not handled
  4397. x = [np.nan, 2.0, 3.0, 4.0]
  4398. y = [1.0, 2.0, 1.0, 2.0]
  4399. r1 = stats.ttest_rel(x, y, nan_policy='omit')
  4400. r2 = stats.ttest_rel(y, x, nan_policy='omit')
  4401. assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
  4402. assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)
  4403. # NB: arguments are paired when NaNs are dropped
  4404. r3 = stats.ttest_rel(y[1:], x[1:])
  4405. assert_allclose(r2, r3, atol=1e-15)
  4406. # .. and this is consistent with R. R code:
  4407. # x = c(NA, 2.0, 3.0, 4.0)
  4408. # y = c(1.0, 2.0, 1.0, 2.0)
  4409. # t.test(x, y, paired=TRUE)
  4410. assert_allclose(r2, (-2, 0.1835), atol=1e-4)
  4411. def test_ttest_rel_empty_1d_returns_nan():
  4412. # Two empty inputs should return a TtestResult containing nan
  4413. # for both values.
  4414. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  4415. result = stats.ttest_rel([], [])
  4416. assert isinstance(result, stats._stats_py.TtestResult)
  4417. assert_equal(result, (np.nan, np.nan))
  4418. @pytest.mark.parametrize('b, expected_shape',
  4419. [(np.empty((1, 5, 0)), (3, 5)),
  4420. (np.empty((1, 0, 0)), (3, 0))])
  4421. def test_ttest_rel_axis_size_zero(b, expected_shape):
  4422. # In this test, the length of the axis dimension is zero.
  4423. # The results should be arrays containing nan with shape
  4424. # given by the broadcast nonaxis dimensions.
  4425. a = np.empty((3, 1, 0))
  4426. with warnings.catch_warnings():
  4427. # first case should warn, second shouldn't?
  4428. warnings.filterwarnings("ignore", too_small_nd_not_omit, SmallSampleWarning)
  4429. result = stats.ttest_rel(a, b, axis=-1)
  4430. assert isinstance(result, stats._stats_py.TtestResult)
  4431. expected_value = np.full(expected_shape, fill_value=np.nan)
  4432. assert_equal(result.statistic, expected_value)
  4433. assert_equal(result.pvalue, expected_value)
  4434. def test_ttest_rel_nonaxis_size_zero():
  4435. # In this test, the length of the axis dimension is nonzero,
  4436. # but one of the nonaxis dimensions has length 0. Check that
  4437. # we still get the correctly broadcast shape, which is (5, 0)
  4438. # in this case.
  4439. a = np.empty((1, 8, 0))
  4440. b = np.empty((5, 8, 1))
  4441. result = stats.ttest_rel(a, b, axis=1)
  4442. assert isinstance(result, stats._stats_py.TtestResult)
  4443. assert_equal(result.statistic.shape, (5, 0))
  4444. assert_equal(result.pvalue.shape, (5, 0))
  4445. @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater'])
  4446. def test_ttest_rel_ci_1d(alternative):
  4447. # test confidence interval method against reference values
  4448. rng = np.random.default_rng(3749065329432213059)
  4449. n = 10
  4450. x = rng.normal(size=n, loc=1.5, scale=2)
  4451. y = rng.normal(size=n, loc=2, scale=2)
  4452. # Reference values generated with R t.test:
  4453. # options(digits=16)
  4454. # x = c(1.22825792, 1.63950485, 4.39025641, 0.68609437, 2.03813481,
  4455. # -1.20040109, 1.81997937, 1.86854636, 2.94694282, 3.94291373)
  4456. # y = c(3.49961496, 1.53192536, 5.53620083, 2.91687718, 0.04858043,
  4457. # 3.78505943, 3.3077496 , 2.30468892, 3.42168074, 0.56797592)
  4458. # t.test(x, y, paired=TRUE, conf.level=0.85, alternative='l')
  4459. ref = {'two-sided': [-1.912194489914035, 0.400169725914035],
  4460. 'greater': [-1.563944820311475, np.inf],
  4461. 'less': [-np.inf, 0.05192005631147523]}
  4462. res = stats.ttest_rel(x, y, alternative=alternative)
  4463. ci = res.confidence_interval(confidence_level=0.85)
  4464. assert_allclose(ci, ref[alternative])
  4465. assert_equal(res.df, n-1)
  4466. @pytest.mark.parametrize("test_fun, args",
  4467. [(stats.ttest_1samp, (np.arange(10), 0)),
  4468. (stats.ttest_rel, (np.arange(10), np.arange(10)))])
  4469. def test_ttest_ci_iv(test_fun, args):
  4470. # test `confidence_interval` method input validation
  4471. res = test_fun(*args)
  4472. message = '`confidence_level` must be a number between 0 and 1.'
  4473. with pytest.raises(ValueError, match=message):
  4474. res.confidence_interval(confidence_level=10)
  4475. def _desc_stats(x1, x2, axis=0, *, xp=None):
  4476. xp = array_namespace(x1, x2) if xp is None else xp
  4477. def _stats(x, axis=0):
  4478. x = xp.asarray(x)
  4479. mu = xp.mean(x, axis=axis)
  4480. std = xp.std(x, axis=axis, correction=1)
  4481. nobs = x.shape[axis]
  4482. return mu, std, nobs
  4483. return _stats(x1, axis) + _stats(x2, axis)
  4484. @make_xp_test_case(stats.ttest_ind, stats.ttest_ind_from_stats)
  4485. def test_ttest_ind(xp):
  4486. # regression test
  4487. tr = xp.asarray(1.0912746897927283)
  4488. pr = xp.asarray(0.27647818616351882)
  4489. tr_2D = xp.stack([tr, -tr])
  4490. pr_2D = xp.stack([pr, pr])
  4491. rvs1 = xp.linspace(5, 105, 100)
  4492. rvs2 = xp.linspace(1, 100, 100)
  4493. rvs1_2D = xp.stack([rvs1, rvs2])
  4494. rvs2_2D = xp.stack([rvs2, rvs1])
  4495. res = stats.ttest_ind(rvs1, rvs2, axis=0)
  4496. t, p = res # check that result object can be unpacked
  4497. xp_assert_close(t, tr)
  4498. xp_assert_close(p, pr)
  4499. res = stats.ttest_ind_from_stats(*_desc_stats(rvs1, rvs2))
  4500. t, p = res # check that result object can be unpacked
  4501. xp_assert_close(t, tr)
  4502. xp_assert_close(p, pr)
  4503. res = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0)
  4504. xp_assert_close(res.statistic, tr_2D)
  4505. xp_assert_close(res.pvalue, pr_2D)
  4506. res = stats.ttest_ind_from_stats(*_desc_stats(rvs1_2D.T, rvs2_2D.T))
  4507. xp_assert_close(res.statistic, tr_2D)
  4508. xp_assert_close(res.pvalue, pr_2D)
  4509. res = stats.ttest_ind(rvs1_2D, rvs2_2D, axis=1)
  4510. xp_assert_close(res.statistic, tr_2D)
  4511. xp_assert_close(res.pvalue, pr_2D)
  4512. res = stats.ttest_ind_from_stats(*_desc_stats(rvs1_2D, rvs2_2D, axis=1))
  4513. xp_assert_close(res.statistic, tr_2D)
  4514. xp_assert_close(res.pvalue, pr_2D)
  4515. # test on 3 dimensions removed because generic tests in
  4516. # test_axis_nan_policy are much stronger
  4517. # test alternative parameter
  4518. message = "`alternative` must be 'less', 'greater', or 'two-sided'."
  4519. with pytest.raises(ValueError, match=message):
  4520. stats.ttest_ind(rvs1, rvs2, alternative = "error")
  4521. args = _desc_stats(rvs1_2D.T, rvs2_2D.T)
  4522. with pytest.raises(ValueError, match=message):
  4523. stats.ttest_ind_from_stats(*args, alternative = "error")
  4524. t, p = stats.ttest_ind(rvs1, rvs2, alternative="less")
  4525. xp_assert_close(p, 1 - (pr/2))
  4526. xp_assert_close(t, tr)
  4527. t, p = stats.ttest_ind(rvs1, rvs2, alternative="greater")
  4528. xp_assert_close(p, pr/2)
  4529. xp_assert_close(t, tr)
  4530. # Check that ttest_ind_from_stats agrees with ttest_ind
  4531. res1 = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0, alternative="less")
  4532. args = _desc_stats(rvs1_2D.T, rvs2_2D.T)
  4533. res2 = stats.ttest_ind_from_stats(*args, alternative="less")
  4534. xp_assert_close(res1.statistic, res2.statistic)
  4535. xp_assert_close(res1.pvalue, res2.pvalue)
  4536. res1 = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0, alternative="less")
  4537. args = _desc_stats(rvs1_2D.T, rvs2_2D.T)
  4538. res2 = stats.ttest_ind_from_stats(*args, alternative="less")
  4539. xp_assert_close(res1.statistic, res2.statistic)
  4540. xp_assert_close(res1.pvalue, res2.pvalue)
  4541. # test NaNs
  4542. NaN = xp.asarray(xp.nan)
  4543. rvs1 = xp.where(xp.arange(rvs1.shape[0]) == 0, NaN, rvs1)
  4544. res = stats.ttest_ind(rvs1, rvs2, axis=0)
  4545. xp_assert_equal(res.statistic, NaN)
  4546. xp_assert_equal(res.pvalue, NaN)
  4547. res = stats.ttest_ind_from_stats(*_desc_stats(rvs1, rvs2))
  4548. xp_assert_equal(res.statistic, NaN)
  4549. xp_assert_equal(res.pvalue, NaN)
  4550. def test_ttest_ind_nan_policy():
  4551. rvs1 = np.linspace(5, 105, 100)
  4552. rvs2 = np.linspace(1, 100, 100)
  4553. rvs1_2D = np.array([rvs1, rvs2])
  4554. rvs2_2D = np.array([rvs2, rvs1])
  4555. rvs1_3D = np.dstack([rvs1_2D, rvs1_2D, rvs1_2D])
  4556. rvs2_3D = np.dstack([rvs2_2D, rvs2_2D, rvs2_2D])
  4557. # check nan policy
  4558. rng = np.random.RandomState(12345678)
  4559. x = stats.norm.rvs(loc=5, scale=10, size=501, random_state=rng)
  4560. x[500] = np.nan
  4561. y = stats.norm.rvs(loc=5, scale=10, size=500, random_state=rng)
  4562. with np.errstate(invalid="ignore"):
  4563. assert_array_equal(stats.ttest_ind(x, y), (np.nan, np.nan))
  4564. assert_array_almost_equal(stats.ttest_ind(x, y, nan_policy='omit'),
  4565. (0.24779670949091914, 0.80434267337517906))
  4566. assert_raises(ValueError, stats.ttest_ind, x, y, nan_policy='raise')
  4567. assert_raises(ValueError, stats.ttest_ind, x, y, nan_policy='foobar')
  4568. # test zero division problem
  4569. with pytest.warns(RuntimeWarning, match="Precision loss occurred"):
  4570. t, p = stats.ttest_ind([0, 0, 0], [1, 1, 1])
  4571. assert_equal((np.abs(t), p), (np.inf, 0))
  4572. with np.errstate(invalid="ignore"):
  4573. assert_equal(stats.ttest_ind([0, 0, 0], [0, 0, 0]), (np.nan, np.nan))
  4574. # check that nan in input array result in nan output
  4575. anan = np.array([[1, np.nan], [-1, 1]])
  4576. assert_equal(stats.ttest_ind(anan, np.zeros((2, 2))),
  4577. ([0, np.nan], [1, np.nan]))
  4578. rvs1_3D[:, :, 10:15] = np.nan
  4579. rvs2_3D[:, :, 6:12] = np.nan
  4580. # Convert from two-sided p-values to one sided using T result data.
  4581. def convert(t, p, alt):
  4582. if (t < 0 and alt == "less") or (t > 0 and alt == "greater"):
  4583. return p / 2
  4584. return 1 - (p / 2)
  4585. converter = np.vectorize(convert)
  4586. tr, pr = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=0, nan_policy='omit')
  4587. t, p = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=0, nan_policy='omit',
  4588. alternative='less')
  4589. assert_allclose(t, tr, rtol=1e-14)
  4590. assert_allclose(p, converter(tr, pr, 'less'), rtol=1e-14)
  4591. t, p = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=0, nan_policy='omit',
  4592. alternative='greater')
  4593. assert_allclose(t, tr, rtol=1e-14)
  4594. assert_allclose(p, converter(tr, pr, 'greater'), rtol=1e-14)
  4595. def test_ttest_ind_scalar():
  4596. # test scalars
  4597. with warnings.catch_warnings(), np.errstate(invalid="ignore"):
  4598. warnings.filterwarnings(
  4599. "ignore", "Degrees of freedom <= 0 for slice", RuntimeWarning)
  4600. t, p = stats.ttest_ind(4., 3.)
  4601. assert np.isnan(t)
  4602. assert np.isnan(p)
  4603. @pytest.mark.filterwarnings("ignore:Arguments...:DeprecationWarning")
  4604. class Test_ttest_ind_permutations:
  4605. N = 20
  4606. # data for most tests
  4607. rng = np.random.default_rng(169708062)
  4608. a = np.vstack((np.arange(3*N//4), rng.random(3*N//4)))
  4609. b = np.vstack((np.arange(N//4) + 100, rng.random(N//4)))
  4610. # data for equal variance tests
  4611. a2 = np.arange(10)
  4612. b2 = np.arange(10) + 100
  4613. # data for exact test
  4614. a3 = [1, 2]
  4615. b3 = [3, 4]
  4616. # data for bigger test
  4617. rvs1 = stats.norm.rvs(loc=5, scale=10, # type: ignore
  4618. size=500, random_state=rng).reshape(100, 5).T
  4619. rvs2 = stats.norm.rvs(loc=8, scale=20, size=100, random_state=rng) # type: ignore
  4620. p_d = [1/1001, (676+1)/1001] # desired pvalues
  4621. p_d_gen = [1/1001, (672 + 1)/1001] # desired pvalues for Generator seed
  4622. p_d_big = [(993+1)/1001, (685+1)/1001, (840+1)/1001,
  4623. (955+1)/1001, (255+1)/1001]
  4624. params = [
  4625. (a, b, {"axis": 1}, p_d), # basic test
  4626. (a.T, b.T, {'axis': 0}, p_d), # along axis 0
  4627. (a[0, :], b[0, :], {'axis': None}, p_d[0]), # 1d data
  4628. (a[0, :].tolist(), b[0, :].tolist(), {'axis': None}, p_d[0]),
  4629. # different seeds
  4630. (a, b, {'random_state': 0, "axis": 1}, p_d),
  4631. (a, b, {'random_state': np.random.RandomState(0), "axis": 1}, p_d),
  4632. (a2, b2, {'equal_var': True}, 1/1001), # equal variances
  4633. (rvs1, rvs2, {'axis': -1, 'random_state': 0}, p_d_big), # bigger test
  4634. (a3, b3, {}, 1/3), # exact test
  4635. (a, b, {'random_state': np.random.default_rng(0), "axis": 1}, p_d_gen),
  4636. ]
  4637. @pytest.mark.parametrize("alternative", ['less', 'greater', 'two-sided'])
  4638. @pytest.mark.parametrize("shape", [(12,), (2, 12)])
  4639. def test_permutation_method(self, alternative, shape):
  4640. rng = np.random.default_rng(2348934579834565)
  4641. x = rng.random(size=shape)
  4642. y = rng.random(size=13)
  4643. kwargs = dict(n_resamples=999)
  4644. # Use ttest_ind with `method`
  4645. rng = np.random.default_rng(348934579834565)
  4646. method = stats.PermutationMethod(rng=rng, **kwargs)
  4647. res = stats.ttest_ind(x, y, axis=-1, alternative=alternative, method=method)
  4648. # Use `permutation_test` directly
  4649. def statistic(x, y, axis): return stats.ttest_ind(x, y, axis=axis).statistic
  4650. rng = np.random.default_rng(348934579834565)
  4651. ref = stats.permutation_test((x, y), statistic, axis=-1, rng=rng,
  4652. alternative=alternative, **kwargs)
  4653. assert_equal(res.statistic, ref.statistic)
  4654. assert_equal(res.pvalue, ref.pvalue)
  4655. # Sanity check against theoretical t-test
  4656. ref = stats.ttest_ind(x, y, axis=-1, alternative=alternative)
  4657. assert_equal(res.statistic, ref.statistic)
  4658. assert_allclose(res.pvalue, ref.pvalue, rtol=3e-2)
  4659. @pytest.mark.parametrize("alternative", ['less', 'greater', 'two-sided'])
  4660. @pytest.mark.parametrize("shape", [(12,), (2, 12)])
  4661. def test_monte_carlo_method(self, alternative, shape):
  4662. rng = np.random.default_rng(2348934579834565)
  4663. x = rng.random(size=shape)
  4664. y = rng.random(size=13)
  4665. kwargs = dict(n_resamples=999)
  4666. # Use `monte_carlo` directly
  4667. def statistic(x, y, axis): return stats.ttest_ind(x, y, axis=axis).statistic
  4668. rng = np.random.default_rng(348934579834565)
  4669. rvs = [rng.standard_normal, rng.standard_normal]
  4670. ref = stats.monte_carlo_test((x, y), rvs=rvs, statistic=statistic, axis=-1,
  4671. alternative=alternative, **kwargs)
  4672. # Use ttest_ind with `method`
  4673. rng = np.random.default_rng(348934579834565)
  4674. rvs = [rng.standard_normal, rng.standard_normal]
  4675. method = stats.MonteCarloMethod(rvs=rvs, **kwargs)
  4676. res = stats.ttest_ind(x, y, axis=-1, alternative=alternative, method=method)
  4677. assert_equal(res.statistic, ref.statistic)
  4678. assert_equal(res.pvalue, ref.pvalue)
  4679. # Passing `rng` instead of `rvs`
  4680. method = stats.MonteCarloMethod(rng=348934579834565, **kwargs)
  4681. res = stats.ttest_ind(x, y, axis=-1, alternative=alternative, method=method)
  4682. assert_equal(res.statistic, ref.statistic)
  4683. assert_equal(res.pvalue, ref.pvalue)
  4684. # Sanity check against theoretical t-test
  4685. ref = stats.ttest_ind(x, y, axis=-1, alternative=alternative)
  4686. assert_equal(res.statistic, ref.statistic)
  4687. assert_allclose(res.pvalue, ref.pvalue, rtol=6e-2)
  4688. def test_resampling_input_validation(self):
  4689. message = "`method` must be an instance of `PermutationMethod`, an instance..."
  4690. with pytest.raises(ValueError, match=message):
  4691. stats.ttest_ind([1, 2, 3], [4, 5, 6], method='migratory')
  4692. class Test_ttest_ind_common:
  4693. # for tests that are performed on variations of the t-test (e.g. trimmed)
  4694. @pytest.mark.xslow()
  4695. @pytest.mark.parametrize("kwds", [{'trim': .2}, {}],
  4696. ids=["trim", "basic"])
  4697. @pytest.mark.parametrize('equal_var', [True, False],
  4698. ids=['equal_var', 'unequal_var'])
  4699. def test_ttest_many_dims(self, kwds, equal_var):
  4700. # Test that test works on many-dimensional arrays
  4701. rng = np.random.default_rng(3815288136)
  4702. a = rng.random((5, 4, 4, 7, 1, 6))
  4703. b = rng.random((4, 1, 8, 2, 6))
  4704. res = stats.ttest_ind(a, b, axis=-3, **kwds)
  4705. # compare fully-vectorized t-test against t-test on smaller slice
  4706. i, j, k = 2, 3, 1
  4707. a2 = a[i, :, j, :, 0, :]
  4708. b2 = b[:, 0, :, k, :]
  4709. res2 = stats.ttest_ind(a2, b2, axis=-2, **kwds)
  4710. assert_equal(res.statistic[i, :, j, k, :],
  4711. res2.statistic)
  4712. assert_equal(res.pvalue[i, :, j, k, :],
  4713. res2.pvalue)
  4714. # compare against t-test on one axis-slice at a time
  4715. # manually broadcast with tile; move axis to end to simplify
  4716. x = np.moveaxis(np.tile(a, (1, 1, 1, 1, 2, 1)), -3, -1)
  4717. y = np.moveaxis(np.tile(b, (5, 1, 4, 1, 1, 1)), -3, -1)
  4718. shape = x.shape[:-1]
  4719. statistics = np.zeros(shape)
  4720. pvalues = np.zeros(shape)
  4721. for indices in product(*(range(i) for i in shape)):
  4722. xi = x[indices] # use tuple to index single axis slice
  4723. yi = y[indices]
  4724. res3 = stats.ttest_ind(xi, yi, axis=-1, **kwds)
  4725. statistics[indices] = res3.statistic
  4726. pvalues[indices] = res3.pvalue
  4727. assert_allclose(statistics, res.statistic)
  4728. assert_allclose(pvalues, res.pvalue)
  4729. @pytest.mark.parametrize("kwds", [{'trim': .2}, {}],
  4730. ids=["trim", "basic"])
  4731. @pytest.mark.parametrize("axis", [-1, 0])
  4732. def test_nans_on_axis(self, kwds, axis):
  4733. # confirm that with `nan_policy='propagate'`, NaN results are returned
  4734. # on the correct location
  4735. rng = np.random.default_rng(363836384995579937222)
  4736. a = rng.integers(10, size=(5, 3, 10)).astype('float')
  4737. b = rng.integers(10, size=(5, 3, 10)).astype('float')
  4738. # set some indices in `a` and `b` to be `np.nan`.
  4739. a[0][2][3] = np.nan
  4740. b[2][0][6] = np.nan
  4741. # arbitrarily use `np.sum` as a baseline for which indices should be
  4742. # NaNs
  4743. expected = np.isnan(np.sum(a + b, axis=axis))
  4744. # multidimensional inputs to `t.sf(np.abs(t), df)` with NaNs on some
  4745. # indices throws an warning. See issue gh-13844
  4746. with warnings.catch_warnings(), np.errstate(invalid="ignore"):
  4747. warnings.filterwarnings(
  4748. "ignore", "invalid value encountered in less_equal", RuntimeWarning)
  4749. warnings.filterwarnings("ignore", "Precision loss occurred", RuntimeWarning)
  4750. res = stats.ttest_ind(a, b, axis=axis, **kwds)
  4751. p_nans = np.isnan(res.pvalue)
  4752. assert_array_equal(p_nans, expected)
  4753. statistic_nans = np.isnan(res.statistic)
  4754. assert_array_equal(statistic_nans, expected)
  4755. class Test_ttest_trim:
  4756. params = [
  4757. [[1, 2, 3], [1.1, 2.9, 4.2], 0.53619490753126731, -0.6864951273557258,
  4758. .2],
  4759. [[56, 128.6, 12, 123.8, 64.34, 78, 763.3], [1.1, 2.9, 4.2],
  4760. 0.00998909252078421, 4.591598691181999, .2],
  4761. [[56, 128.6, 12, 123.8, 64.34, 78, 763.3], [1.1, 2.9, 4.2],
  4762. 0.10512380092302633, 2.832256715395378, .32],
  4763. [[2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9],
  4764. [6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1],
  4765. 0.002878909511344, -4.2461168970325, .2],
  4766. [[-0.84504783, 0.13366078, 3.53601757, -0.62908581, 0.54119466,
  4767. -1.16511574, -0.08836614, 1.18495416, 2.48028757, -1.58925028,
  4768. -1.6706357, 0.3090472, -2.12258305, 0.3697304, -1.0415207,
  4769. -0.57783497, -0.90997008, 1.09850192, 0.41270579, -1.4927376],
  4770. [1.2725522, 1.1657899, 2.7509041, 1.2389013, -0.9490494, -1.0752459,
  4771. 1.1038576, 2.9912821, 3.5349111, 0.4171922, 1.0168959, -0.7625041,
  4772. -0.4300008, 3.0431921, 1.6035947, 0.5285634, -0.7649405, 1.5575896,
  4773. 1.3670797, 1.1726023], 0.005293305834235, -3.0983317739483, .2]]
  4774. @pytest.mark.parametrize("a,b,pr,tr,trim", params)
  4775. def test_ttest_compare_r(self, a, b, pr, tr, trim):
  4776. '''
  4777. Using PairedData's yuen.t.test method. Something to note is that there
  4778. are at least 3 R packages that come with a trimmed t-test method, and
  4779. comparisons were made between them. It was found that PairedData's
  4780. method's results match this method, SAS, and one of the other R
  4781. methods. A notable discrepancy was the DescTools implementation of the
  4782. function, which only sometimes agreed with SAS, WRS2, PairedData and
  4783. this implementation. For this reason, most comparisons in R are made
  4784. against PairedData's method.
  4785. Rather than providing the input and output for all evaluations, here is
  4786. a representative example:
  4787. > library(PairedData)
  4788. > a <- c(1, 2, 3)
  4789. > b <- c(1.1, 2.9, 4.2)
  4790. > options(digits=16)
  4791. > yuen.t.test(a, b, tr=.2)
  4792. Two-sample Yuen test, trim=0.2
  4793. data: x and y
  4794. t = -0.68649512735573, df = 3.4104431643464, p-value = 0.5361949075313
  4795. alternative hypothesis: true difference in trimmed means is not equal
  4796. to 0
  4797. 95 percent confidence interval:
  4798. -3.912777195645217 2.446110528978550
  4799. sample estimates:
  4800. trimmed mean of x trimmed mean of y
  4801. 2.000000000000000 2.73333333333333
  4802. '''
  4803. statistic, pvalue = stats.ttest_ind(a, b, trim=trim, equal_var=False)
  4804. assert_allclose(statistic, tr, atol=1e-15)
  4805. assert_allclose(pvalue, pr, atol=1e-15)
  4806. def test_compare_SAS(self):
  4807. # Source of the data used in this test:
  4808. # https://support.sas.com/resources/papers/proceedings14/1660-2014.pdf
  4809. a = [12, 14, 18, 25, 32, 44, 12, 14, 18, 25, 32, 44]
  4810. b = [17, 22, 14, 12, 30, 29, 19, 17, 22, 14, 12, 30, 29, 19]
  4811. # In this paper, a trimming percentage of 5% is used. However,
  4812. # in their implementation, the number of values trimmed is rounded to
  4813. # the nearest whole number. However, consistent with
  4814. # `scipy.stats.trimmed_mean`, this test truncates to the lower
  4815. # whole number. In this example, the paper notes that 1 value is
  4816. # trimmed off of each side. 9% replicates this amount of trimming.
  4817. statistic, pvalue = stats.ttest_ind(a, b, trim=.09, equal_var=False)
  4818. assert_allclose(pvalue, 0.514522, atol=1e-6)
  4819. assert_allclose(statistic, 0.669169, atol=1e-6)
  4820. def test_equal_var(self):
  4821. '''
  4822. The PairedData library only supports unequal variances. To compare
  4823. samples with equal variances, the multicon library is used.
  4824. > library(multicon)
  4825. > a <- c(2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9)
  4826. > b <- c(6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1)
  4827. > dv = c(a,b)
  4828. > iv = c(rep('a', length(a)), rep('b', length(b)))
  4829. > yuenContrast(dv~ iv, EQVAR = TRUE)
  4830. $Ms
  4831. N M wgt
  4832. a 11 2.442857142857143 1
  4833. b 11 5.385714285714286 -1
  4834. $test
  4835. stat df crit p
  4836. results -4.246116897032513 12 2.178812829667228 0.00113508833897713
  4837. '''
  4838. a = [2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9]
  4839. b = [6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1]
  4840. # `equal_var=True` is default
  4841. statistic, pvalue = stats.ttest_ind(a, b, trim=.2)
  4842. assert_allclose(pvalue, 0.00113508833897713, atol=1e-10)
  4843. assert_allclose(statistic, -4.246116897032513, atol=1e-10)
  4844. @pytest.mark.parametrize('alt,pr,tr',
  4845. (('greater', 0.9985605452443, -4.2461168970325),
  4846. ('less', 0.001439454755672, -4.2461168970325),),
  4847. )
  4848. def test_alternatives(self, alt, pr, tr):
  4849. '''
  4850. > library(PairedData)
  4851. > a <- c(2.7,2.7,1.1,3.0,1.9,3.0,3.8,3.8,0.3,1.9,1.9)
  4852. > b <- c(6.5,5.4,8.1,3.5,0.5,3.8,6.8,4.9,9.5,6.2,4.1)
  4853. > options(digits=16)
  4854. > yuen.t.test(a, b, alternative = 'greater')
  4855. '''
  4856. a = [2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9]
  4857. b = [6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1]
  4858. statistic, pvalue = stats.ttest_ind(a, b, trim=.2, equal_var=False,
  4859. alternative=alt)
  4860. assert_allclose(pvalue, pr, atol=1e-10)
  4861. assert_allclose(statistic, tr, atol=1e-10)
  4862. @skip_xp_backends(cpu_only=True, reason='Uses NumPy for pvalue, CI')
  4863. def test_permutation_not_implement_for_xp(self, xp):
  4864. message = "Use of `trim` is compatible only with NumPy arrays."
  4865. a, b = xp.arange(10), xp.arange(10)+1
  4866. if is_numpy(xp): # no error
  4867. stats.ttest_ind(a, b, trim=0.1)
  4868. else: # NotImplementedError
  4869. with pytest.raises(NotImplementedError, match=message):
  4870. stats.ttest_ind(a, b, trim=0.1)
  4871. @pytest.mark.parametrize("trim", [-.2, .5, 1])
  4872. def test_trim_bounds_error(self, trim):
  4873. match = "Trimming percentage should be 0 <= `trim` < .5."
  4874. with assert_raises(ValueError, match=match):
  4875. stats.ttest_ind([1, 2], [2, 1], trim=trim)
  4876. @make_xp_test_case(stats.ttest_ind)
  4877. class Test_ttest_CI:
  4878. # indices in order [alternative={two-sided, less, greater},
  4879. # equal_var={False, True}, trim={0, 0.2}]
  4880. # reference values in order `statistic, df, pvalue, low, high`
  4881. # equal_var=False reference values computed with R PairedData yuen.t.test:
  4882. #
  4883. # library(PairedData)
  4884. # options(digits=16)
  4885. # a < - c(0.88236329, 0.97318744, 0.4549262, 0.97893335, 0.0606677,
  4886. # 0.44013366, 0.55806018, 0.40151434, 0.14453315, 0.25860601,
  4887. # 0.20202162)
  4888. # b < - c(0.93455277, 0.42680603, 0.49751939, 0.14152846, 0.711435,
  4889. # 0.77669667, 0.20507578, 0.78702772, 0.94691855, 0.32464958,
  4890. # 0.3873582, 0.35187468, 0.21731811)
  4891. # yuen.t.test(a, b, tr=0, conf.level = 0.9, alternative = 'l')
  4892. #
  4893. # equal_var=True reference values computed with R multicon yuenContrast:
  4894. #
  4895. # library(multicon)
  4896. # options(digits=16)
  4897. # a < - c(0.88236329, 0.97318744, 0.4549262, 0.97893335, 0.0606677,
  4898. # 0.44013366, 0.55806018, 0.40151434, 0.14453315, 0.25860601,
  4899. # 0.20202162)
  4900. # b < - c(0.93455277, 0.42680603, 0.49751939, 0.14152846, 0.711435,
  4901. # 0.77669667, 0.20507578, 0.78702772, 0.94691855, 0.32464958,
  4902. # 0.3873582, 0.35187468, 0.21731811)
  4903. # dv = c(a, b)
  4904. # iv = c(rep('a', length(a)), rep('b', length(b)))
  4905. # yuenContrast(dv~iv, EQVAR = FALSE, alternative = 'unequal', tr = 0.2)
  4906. r = np.empty(shape=(3, 2, 2, 5))
  4907. r[0, 0, 0] = [-0.2314607, 19.894435, 0.8193209, -0.247220294, 0.188729943]
  4908. r[1, 0, 0] = [-0.2314607, 19.894435, 0.40966045, -np.inf, 0.1382426469]
  4909. r[2, 0, 0] = [-0.2314607, 19.894435, 0.5903395, -0.1967329982, np.inf]
  4910. r[0, 0, 1] = [-0.2452886, 11.427896, 0.8105823, -0.34057446, 0.25847383]
  4911. r[1, 0, 1] = [-0.2452886, 11.427896, 0.40529115, -np.inf, 0.1865829074]
  4912. r[2, 0, 1] = [-0.2452886, 11.427896, 0.5947089, -0.268683541, np.inf]
  4913. # confidence interval not available for equal_var=True
  4914. r[0, 1, 0] = [-0.2345625322555006, 22, 0.8167175905643815, np.nan, np.nan]
  4915. r[1, 1, 0] = [-0.2345625322555006, 22, 0.4083587952821908, np.nan, np.nan]
  4916. r[2, 1, 0] = [-0.2345625322555006, 22, 0.5916412047178092, np.nan, np.nan]
  4917. r[0, 1, 1] = [-0.2505369406507428, 14, 0.8058115135702835, np.nan, np.nan]
  4918. r[1, 1, 1] = [-0.2505369406507428, 14, 0.4029057567851417, np.nan, np.nan]
  4919. r[2, 1, 1] = [-0.2505369406507428, 14, 0.5970942432148583, np.nan, np.nan]
  4920. @pytest.mark.parametrize('alternative', ['two-sided', 'less', 'greater'])
  4921. @pytest.mark.parametrize('equal_var', [False, True])
  4922. @pytest.mark.parametrize('trim', [0, 0.2])
  4923. @skip_xp_backends('jax.numpy', reason='Generic stdtrit mutates array.')
  4924. def test_confidence_interval(self, alternative, equal_var, trim, xp):
  4925. if equal_var and trim:
  4926. pytest.xfail('Discrepancy in `main`; needs further investigation.')
  4927. if trim and not is_numpy(xp):
  4928. pytest.skip('`trim` is only compatible with NumPy input')
  4929. rng = np.random.default_rng(3810954496107292580)
  4930. x = xp.asarray(rng.random(11))
  4931. y = xp.asarray(rng.random(13))
  4932. res = stats.ttest_ind(x, y, alternative=alternative,
  4933. equal_var=equal_var, trim=trim)
  4934. alternatives = {'two-sided': 0, 'less': 1, 'greater': 2}
  4935. ref = self.r[alternatives[alternative], int(equal_var), int(np.ceil(trim))]
  4936. statistic, df, pvalue, low, high = ref
  4937. rtol = 1e-7 # only 7 digits in reference
  4938. xp_assert_close(res.statistic, xp.asarray(statistic), rtol=rtol)
  4939. xp_assert_close(res.df, xp.asarray(df), rtol=rtol)
  4940. xp_assert_close(res.pvalue, xp.asarray(pvalue), rtol=rtol)
  4941. if not equal_var: # CI not available when `equal_var is True`
  4942. ci = res.confidence_interval(0.9)
  4943. xp_assert_close(ci.low, xp.asarray(low), rtol=rtol)
  4944. xp_assert_close(ci.high, xp.asarray(high), rtol=rtol)
  4945. def test__broadcast_concatenate():
  4946. # test that _broadcast_concatenate properly broadcasts arrays along all
  4947. # axes except `axis`, then concatenates along axis
  4948. rng = np.random.default_rng(7544340069)
  4949. a = rng.random((5, 4, 4, 3, 1, 6))
  4950. b = rng.random((4, 1, 8, 2, 6))
  4951. c = _broadcast_concatenate((a, b), axis=-3)
  4952. # broadcast manually as an independent check
  4953. a = np.tile(a, (1, 1, 1, 1, 2, 1))
  4954. b = np.tile(b[None, ...], (5, 1, 4, 1, 1, 1))
  4955. for index in product(*(range(i) for i in c.shape)):
  4956. i, j, k, l, m, n = index
  4957. if l < a.shape[-3]:
  4958. assert a[i, j, k, l, m, n] == c[i, j, k, l, m, n]
  4959. else:
  4960. assert b[i, j, k, l - a.shape[-3], m, n] == c[i, j, k, l, m, n]
  4961. @make_xp_test_case(stats.ttest_ind)
  4962. class TestTTestInd:
  4963. @make_xp_test_case(stats.ttest_ind_from_stats)
  4964. def test_ttest_ind_with_uneq_var(self, xp):
  4965. # check vs. R `t.test`, e.g.
  4966. # options(digits=20)
  4967. # a = c(1., 2., 3.)
  4968. # b = c(1.1, 2.9, 4.2)
  4969. # t.test(a, b, equal.var=FALSE)
  4970. a = xp.asarray([1., 2., 3.])
  4971. b = xp.asarray([1.1, 2.9, 4.2])
  4972. pr = xp.asarray(0.53619490753126686)
  4973. tr = xp.asarray(-0.686495127355726265)
  4974. t, p = stats.ttest_ind(a, b, equal_var=False)
  4975. xp_assert_close(t, tr)
  4976. xp_assert_close(p, pr)
  4977. t, p = stats.ttest_ind_from_stats(*_desc_stats(a, b), equal_var=False)
  4978. xp_assert_close(t, tr)
  4979. xp_assert_close(p, pr)
  4980. a = xp.asarray([1., 2., 3., 4.])
  4981. pr = xp.asarray(0.84354139131608252)
  4982. tr = xp.asarray(-0.210866331595072315)
  4983. t, p = stats.ttest_ind(a, b, equal_var=False)
  4984. xp_assert_close(t, tr)
  4985. xp_assert_close(p, pr)
  4986. t, p = stats.ttest_ind_from_stats(*_desc_stats(a, b), equal_var=False)
  4987. xp_assert_close(t, tr)
  4988. xp_assert_close(p, pr)
  4989. # regression test
  4990. tr = xp.asarray(1.0912746897927283)
  4991. tr_uneq_n = xp.asarray(0.66745638708050492)
  4992. pr = xp.asarray(0.27647831993021388)
  4993. pr_uneq_n = xp.asarray(0.50873585065616544)
  4994. tr_2D = xp.stack([tr, -tr])
  4995. pr_2D = xp.stack([pr, pr])
  4996. rvs3 = xp.linspace(1, 100, 25)
  4997. rvs2 = xp.linspace(1, 100, 100)
  4998. rvs1 = xp.linspace(5, 105, 100)
  4999. rvs1_2D = xp.stack([rvs1, rvs2])
  5000. rvs2_2D = xp.stack([rvs2, rvs1])
  5001. t, p = stats.ttest_ind(rvs1, rvs2, axis=0, equal_var=False)
  5002. xp_assert_close(t, tr)
  5003. xp_assert_close(p, pr)
  5004. t, p = stats.ttest_ind_from_stats(*_desc_stats(rvs1, rvs2), equal_var=False)
  5005. xp_assert_close(t, tr)
  5006. xp_assert_close(p, pr)
  5007. t, p = stats.ttest_ind(rvs1, rvs3, axis=0, equal_var=False)
  5008. xp_assert_close(t, tr_uneq_n)
  5009. xp_assert_close(p, pr_uneq_n)
  5010. t, p = stats.ttest_ind_from_stats(*_desc_stats(rvs1, rvs3), equal_var=False)
  5011. xp_assert_close(t, tr_uneq_n)
  5012. xp_assert_close(p, pr_uneq_n)
  5013. res = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0, equal_var=False)
  5014. xp_assert_close(res.statistic, tr_2D)
  5015. xp_assert_close(res.pvalue, pr_2D)
  5016. args = _desc_stats(rvs1_2D.T, rvs2_2D.T)
  5017. res = stats.ttest_ind_from_stats(*args, equal_var=False)
  5018. xp_assert_close(res.statistic, tr_2D)
  5019. xp_assert_close(res.pvalue, pr_2D)
  5020. res = stats.ttest_ind(rvs1_2D, rvs2_2D, axis=1, equal_var=False)
  5021. xp_assert_close(res.statistic, tr_2D)
  5022. xp_assert_close(res.pvalue, pr_2D)
  5023. args = _desc_stats(rvs1_2D, rvs2_2D, axis=1)
  5024. res = stats.ttest_ind_from_stats(*args, equal_var=False)
  5025. xp_assert_close(res.statistic, tr_2D)
  5026. xp_assert_close(res.pvalue, pr_2D)
  5027. @pytest.mark.filterwarnings(
  5028. "ignore:divide by zero encountered:RuntimeWarning"
  5029. ) # for dask
  5030. @pytest.mark.filterwarnings(
  5031. "ignore:invalid value encountered:RuntimeWarning"
  5032. ) # for dask
  5033. def test_ttest_ind_zero_division(self, xp):
  5034. # test zero division problem
  5035. x = xp.zeros(3)
  5036. y = xp.ones(3)
  5037. with eager_warns(RuntimeWarning, match="Precision loss occurred", xp=xp):
  5038. t, p = stats.ttest_ind(x, y, equal_var=False)
  5039. xp_assert_equal(t, xp.asarray(-xp.inf))
  5040. xp_assert_equal(p, xp.asarray(0.))
  5041. with np.errstate(all='ignore'):
  5042. t, p = stats.ttest_ind(x, x, equal_var=False)
  5043. xp_assert_equal(t, xp.asarray(xp.nan))
  5044. xp_assert_equal(p, xp.asarray(xp.nan))
  5045. # check that nan in input array result in nan output
  5046. anan = xp.asarray([[1, xp.nan], [-1, 1]])
  5047. t, p = stats.ttest_ind(anan, xp.zeros((2, 2)), equal_var=False)
  5048. xp_assert_equal(t, xp.asarray([0., np.nan]))
  5049. xp_assert_equal(p, xp.asarray([1., np.nan]))
  5050. def test_ttest_ind_nan_2nd_arg(self):
  5051. # regression test for gh-6134: nans in the second arg were not handled
  5052. x = [np.nan, 2.0, 3.0, 4.0]
  5053. y = [1.0, 2.0, 1.0, 2.0]
  5054. r1 = stats.ttest_ind(x, y, nan_policy='omit')
  5055. r2 = stats.ttest_ind(y, x, nan_policy='omit')
  5056. assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
  5057. assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)
  5058. # NB: arguments are not paired when NaNs are dropped
  5059. r3 = stats.ttest_ind(y, x[1:])
  5060. assert_allclose(r2, r3, atol=1e-15)
  5061. # .. and this is consistent with R. R code:
  5062. # x = c(NA, 2.0, 3.0, 4.0)
  5063. # y = c(1.0, 2.0, 1.0, 2.0)
  5064. # t.test(x, y, var.equal=TRUE)
  5065. assert_allclose(r2, (-2.5354627641855498, 0.052181400457057901),
  5066. atol=1e-15)
  5067. def test_ttest_ind_empty_1d_returns_nan(self, xp):
  5068. # Two empty inputs should return a TtestResult containing nan
  5069. # for both values.
  5070. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  5071. res = stats.ttest_ind(xp.asarray([]), xp.asarray([]))
  5072. assert isinstance(res, stats._stats_py.TtestResult)
  5073. NaN = xp.asarray(xp.nan)[()]
  5074. xp_assert_equal(res.statistic, NaN)
  5075. xp_assert_equal(res.pvalue, NaN)
  5076. @pytest.mark.parametrize('b, expected_shape',
  5077. [(np.empty((1, 5, 0)), (3, 5)),
  5078. (np.empty((1, 0, 0)), (3, 0))])
  5079. def test_ttest_ind_axis_size_zero(self, b, expected_shape, xp):
  5080. # In this test, the length of the axis dimension is zero.
  5081. # The results should be arrays containing nan with shape
  5082. # given by the broadcast nonaxis dimensions.
  5083. a = xp.empty((3, 1, 0))
  5084. b = xp.asarray(b, dtype=a.dtype)
  5085. with warnings.catch_warnings():
  5086. # first case should warn, second shouldn't?
  5087. warnings.filterwarnings("ignore", too_small_nd_not_omit, SmallSampleWarning)
  5088. res = stats.ttest_ind(a, b, axis=-1)
  5089. assert isinstance(res, stats._stats_py.TtestResult)
  5090. expected_value = xp.full(expected_shape, fill_value=xp.nan)
  5091. xp_assert_equal(res.statistic, expected_value)
  5092. xp_assert_equal(res.pvalue, expected_value)
  5093. def test_ttest_ind_nonaxis_size_zero(self, xp):
  5094. # In this test, the length of the axis dimension is nonzero,
  5095. # but one of the nonaxis dimensions has length 0. Check that
  5096. # we still get the correctly broadcast shape, which is (5, 0)
  5097. # in this case.
  5098. a = xp.empty((1, 8, 0))
  5099. b = xp.empty((5, 8, 1))
  5100. res = stats.ttest_ind(a, b, axis=1)
  5101. assert isinstance(res, stats._stats_py.TtestResult)
  5102. assert res.statistic.shape ==(5, 0)
  5103. assert res.pvalue.shape == (5, 0)
  5104. def test_ttest_ind_nonaxis_size_zero_different_lengths(self, xp):
  5105. # In this test, the length of the axis dimension is nonzero,
  5106. # and that size is different in the two inputs,
  5107. # and one of the nonaxis dimensions has length 0. Check that
  5108. # we still get the correctly broadcast shape, which is (5, 0)
  5109. # in this case.
  5110. a = xp.empty((1, 7, 0))
  5111. b = xp.empty((5, 8, 1))
  5112. res = stats.ttest_ind(a, b, axis=1)
  5113. assert isinstance(res, stats._stats_py.TtestResult)
  5114. assert res.statistic.shape ==(5, 0)
  5115. assert res.pvalue.shape == (5, 0)
  5116. @make_xp_test_case(stats.ttest_ind_from_stats)
  5117. class TestTTestIndFromStats:
  5118. @pytest.mark.skip_xp_backends(np_only=True,
  5119. reason="Other backends don't like integers")
  5120. def test_gh5686(self, xp):
  5121. mean1, mean2 = xp.asarray([1, 2]), xp.asarray([3, 4])
  5122. std1, std2 = xp.asarray([5, 3]), xp.asarray([4, 5])
  5123. nobs1, nobs2 = xp.asarray([130, 140]), xp.asarray([100, 150])
  5124. # This will raise a TypeError unless gh-5686 is fixed.
  5125. stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2)
  5126. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
  5127. def test_ttest_ind_from_stats_inputs_zero(self, xp):
  5128. # Regression test for gh-6409.
  5129. zero = xp.asarray(0.)
  5130. six = xp.asarray(6.)
  5131. NaN = xp.asarray(xp.nan)
  5132. res = stats.ttest_ind_from_stats(zero, zero, six, zero, zero, six,
  5133. equal_var=False)
  5134. xp_assert_equal(res.statistic, NaN)
  5135. xp_assert_equal(res.pvalue, NaN)
  5136. @pytest.mark.skip_xp_backends(cpu_only=True, reason='Test uses ks_1samp')
  5137. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  5138. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  5139. def test_ttest_uniform_pvalues(xp):
  5140. # test that p-values are uniformly distributed under the null hypothesis
  5141. rng = np.random.default_rng(246834602926842)
  5142. x = xp.asarray(rng.normal(size=(10000, 2)))
  5143. y = xp.asarray(rng.normal(size=(10000, 1)))
  5144. q = rng.uniform(size=100)
  5145. res = stats.ttest_ind(x, y, equal_var=True, axis=-1)
  5146. pvalue = np.asarray(res.pvalue)
  5147. assert stats.ks_1samp(pvalue, stats.uniform().cdf).pvalue > 0.1
  5148. assert_allclose(np.quantile(pvalue, q), q, atol=1e-2)
  5149. res = stats.ttest_ind(y, x, equal_var=True, axis=-1)
  5150. pvalue = np.asarray(res.pvalue)
  5151. assert stats.ks_1samp(pvalue, stats.uniform().cdf).pvalue > 0.1
  5152. assert_allclose(np.quantile(pvalue, q), q, atol=1e-2)
  5153. # reference values from R:
  5154. # options(digits=16)
  5155. # t.test(c(2, 3, 5), c(1.5), var.equal=TRUE)
  5156. x, y = xp.asarray([2., 3., 5.]), xp.asarray([1.5])
  5157. res = stats.ttest_ind(x, y, equal_var=True)
  5158. xp_assert_close(res.statistic, xp.asarray(1.0394023007754))
  5159. xp_assert_close(res.pvalue, xp.asarray(0.407779907736))
  5160. def _convert_pvalue_alternative(t, p, alt, xp):
  5161. # test alternative parameter
  5162. # Convert from two-sided p-values to one sided using T result data.
  5163. less = xp.asarray(alt == "less")
  5164. greater = xp.asarray(alt == "greater")
  5165. i = ((t < 0) & less) | ((t > 0) & greater)
  5166. return xp.where(i, p/2, 1 - p/2)
  5167. @pytest.mark.slow
  5168. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  5169. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  5170. @make_xp_test_case(stats.ttest_1samp)
  5171. def test_ttest_1samp_new(xp):
  5172. rng = np.random.default_rng(88123290)
  5173. n1, n2, n3 = (10, 15, 20)
  5174. rvn1 = stats.norm.rvs(loc=5, scale=10, size=(n1, n2, n3), random_state=rng)
  5175. rvn1 = xp.asarray(rvn1)
  5176. # check multidimensional array and correct axis handling
  5177. # deterministic rvn1 and rvn2 would be better as in test_ttest_rel
  5178. popmean = xp.ones((1, n2, n3))
  5179. t1, p1 = stats.ttest_1samp(rvn1, popmean, axis=0)
  5180. t2, p2 = stats.ttest_1samp(rvn1, 1., axis=0)
  5181. t3, p3 = stats.ttest_1samp(rvn1[:, 0, 0], 1.)
  5182. xp_assert_close(t1, t2, rtol=1e-14)
  5183. xp_assert_close(t1[0, 0], t3, rtol=1e-14)
  5184. assert_equal(t1.shape, (n2, n3))
  5185. popmean = xp.ones((n1, 1, n3))
  5186. t1, p1 = stats.ttest_1samp(rvn1, popmean, axis=1)
  5187. t2, p2 = stats.ttest_1samp(rvn1, 1., axis=1)
  5188. t3, p3 = stats.ttest_1samp(rvn1[0, :, 0], 1.)
  5189. xp_assert_close(t1, t2, rtol=1e-14)
  5190. xp_assert_close(t1[0, 0], t3, rtol=1e-14)
  5191. assert_equal(t1.shape, (n1, n3))
  5192. popmean = xp.ones((n1, n2, 1))
  5193. t1, p1 = stats.ttest_1samp(rvn1, popmean, axis=2)
  5194. t2, p2 = stats.ttest_1samp(rvn1, 1., axis=2)
  5195. t3, p3 = stats.ttest_1samp(rvn1[0, 0, :], 1.)
  5196. xp_assert_close(t1, t2, rtol=1e-14)
  5197. xp_assert_close(t1[0, 0], t3, rtol=1e-14)
  5198. assert_equal(t1.shape, (n1, n2))
  5199. # test zero division problem
  5200. t, p = stats.ttest_1samp(xp.asarray([0., 0., 0.]), 1.)
  5201. xp_assert_equal(xp.abs(t), xp.asarray(xp.inf))
  5202. xp_assert_equal(p, xp.asarray(0.))
  5203. tr, pr = stats.ttest_1samp(rvn1[:, :, :], 1.)
  5204. t, p = stats.ttest_1samp(rvn1[:, :, :], 1., alternative="greater")
  5205. pc = _convert_pvalue_alternative(tr, pr, "greater", xp)
  5206. xp_assert_close(p, pc)
  5207. xp_assert_close(t, tr)
  5208. t, p = stats.ttest_1samp(rvn1[:, :, :], 1., alternative="less")
  5209. pc = _convert_pvalue_alternative(tr, pr, "less", xp)
  5210. xp_assert_close(p, pc)
  5211. xp_assert_close(t, tr)
  5212. with np.errstate(all='ignore'):
  5213. res = stats.ttest_1samp(xp.asarray([0., 0., 0.]), 0.)
  5214. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  5215. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  5216. # check that nan in input array result in nan output
  5217. anan = xp.asarray([[1., np.nan], [-1., 1.]])
  5218. res = stats.ttest_1samp(anan, 0.)
  5219. xp_assert_equal(res.statistic, xp.asarray([0., xp.nan]))
  5220. xp_assert_equal(res.pvalue, xp.asarray([1., xp.nan]))
  5221. @skip_xp_backends(np_only=True, reason="Only NumPy has nan_policy='omit' for now")
  5222. def test_ttest_1samp_new_omit(xp):
  5223. rng = np.random.default_rng(4008400329)
  5224. n1, n2, n3 = (5, 10, 15)
  5225. rvn1 = stats.norm.rvs(loc=5, scale=10, size=(n1, n2, n3), random_state=rng)
  5226. rvn1 = xp.asarray(rvn1)
  5227. rvn1[0:2, 1:3, 4:8] = xp.nan
  5228. tr, pr = stats.ttest_1samp(rvn1[:, :, :], 1., nan_policy='omit')
  5229. t, p = stats.ttest_1samp(rvn1[:, :, :], 1., nan_policy='omit',
  5230. alternative="greater")
  5231. pc = _convert_pvalue_alternative(tr, pr, "greater", xp)
  5232. xp_assert_close(p, pc)
  5233. xp_assert_close(t, tr)
  5234. t, p = stats.ttest_1samp(rvn1[:, :, :], 1., nan_policy='omit',
  5235. alternative="less")
  5236. pc = _convert_pvalue_alternative(tr, pr, "less", xp)
  5237. xp_assert_close(p, pc)
  5238. xp_assert_close(t, tr)
  5239. @make_xp_test_case(stats.ttest_1samp)
  5240. @pytest.mark.skip_xp_backends('jax.numpy', reason='Generic stdtrit mutates array.')
  5241. def test_ttest_1samp_popmean_array(xp):
  5242. # when popmean.shape[axis] != 1, raise an error
  5243. # if the user wants to test multiple null hypotheses simultaneously,
  5244. # use standard broadcasting rules
  5245. rng = np.random.default_rng(2913300596553337193)
  5246. x = rng.random(size=(1, 15, 20))
  5247. x = xp.asarray(x)
  5248. message = r"`popmean.shape\[axis\]` must equal 1."
  5249. popmean = xp.asarray(rng.random(size=(5, 2, 20)))
  5250. with pytest.raises(ValueError, match=message):
  5251. stats.ttest_1samp(x, popmean=popmean, axis=-2)
  5252. popmean = xp.asarray(rng.random(size=(5, 1, 20)))
  5253. res = stats.ttest_1samp(x, popmean=popmean, axis=-2)
  5254. assert res.statistic.shape == (5, 20)
  5255. l, u = res.confidence_interval()
  5256. l = xp.expand_dims(l, axis=-2)
  5257. u = xp.expand_dims(u, axis=-2)
  5258. res = stats.ttest_1samp(x, popmean=l, axis=-2)
  5259. ref = xp.broadcast_to(xp.asarray(0.05, dtype=xp.float64), res.pvalue.shape)
  5260. xp_assert_close(res.pvalue, ref)
  5261. res = stats.ttest_1samp(x, popmean=u, axis=-2)
  5262. xp_assert_close(res.pvalue, ref)
  5263. @make_xp_test_case(stats.describe)
  5264. class TestDescribe:
  5265. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  5266. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  5267. def test_describe_scalar(self, xp):
  5268. with warnings.catch_warnings(), \
  5269. np.errstate(invalid="ignore", divide="ignore"):
  5270. warnings.filterwarnings(
  5271. "ignore", "Degrees of freedom <= 0 for slice", RuntimeWarning)
  5272. n, mm, m, v, sk, kurt = stats.describe(xp.asarray(4.)[()])
  5273. assert n == 1
  5274. xp_assert_equal(mm[0], xp.asarray(4.0))
  5275. xp_assert_equal(mm[1], xp.asarray(4.0))
  5276. xp_assert_equal(m, xp.asarray(4.0))
  5277. xp_assert_equal(v ,xp.asarray(xp.nan))
  5278. xp_assert_equal(sk, xp.asarray(xp.nan))
  5279. xp_assert_equal(kurt, xp.asarray(xp.nan))
  5280. def test_describe_numbers(self, xp):
  5281. x = xp.concat((xp.ones((3, 4)), xp.full((2, 4), 2.)))
  5282. nc = 5
  5283. mmc = (xp.asarray([1., 1., 1., 1.]), xp.asarray([2., 2., 2., 2.]))
  5284. mc = xp.asarray([1.4, 1.4, 1.4, 1.4])
  5285. vc = xp.asarray([0.3, 0.3, 0.3, 0.3])
  5286. skc = xp.asarray([0.40824829046386357] * 4)
  5287. kurtc = xp.asarray([-1.833333333333333] * 4)
  5288. n, mm, m, v, sk, kurt = stats.describe(x)
  5289. assert n == nc
  5290. xp_assert_equal(mm[0], mmc[0])
  5291. xp_assert_equal(mm[1], mmc[1])
  5292. xp_assert_close(m, mc, rtol=4 * xp.finfo(m.dtype).eps)
  5293. xp_assert_close(v, vc, rtol=4 * xp.finfo(m.dtype).eps)
  5294. xp_assert_close(sk, skc)
  5295. xp_assert_close(kurt, kurtc)
  5296. n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1)
  5297. assert n == nc
  5298. xp_assert_equal(mm[0], mmc[0])
  5299. xp_assert_equal(mm[1], mmc[1])
  5300. xp_assert_close(m, mc, rtol=4 * xp.finfo(m.dtype).eps)
  5301. xp_assert_close(v, vc, rtol=4 * xp.finfo(m.dtype).eps)
  5302. xp_assert_close(sk, skc)
  5303. xp_assert_close(kurt, kurtc)
  5304. def describe_nan_policy_omit_test(self):
  5305. x = np.arange(10.)
  5306. x[9] = np.nan
  5307. nc, mmc = (9, (0.0, 8.0))
  5308. mc = 4.0
  5309. vc = 7.5
  5310. skc = 0.0
  5311. kurtc = -1.2300000000000002
  5312. n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit')
  5313. assert_equal(n, nc)
  5314. assert_equal(mm, mmc)
  5315. assert_equal(m, mc)
  5316. assert_equal(v, vc)
  5317. assert_array_almost_equal(sk, skc)
  5318. assert_array_almost_equal(kurt, kurtc, decimal=13)
  5319. def test_describe_nan_policy_other(self, xp):
  5320. x = xp.arange(10.)
  5321. x = xp.where(x==9, xp.nan, x)
  5322. if is_lazy_array(x):
  5323. with pytest.raises(TypeError, match='not supported for lazy arrays'):
  5324. stats.describe(x, nan_policy='raise')
  5325. else:
  5326. with pytest.raises(ValueError, match='The input contains nan values'):
  5327. stats.describe(x, nan_policy='raise')
  5328. n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='propagate')
  5329. ref = xp.asarray(xp.nan)[()]
  5330. assert n == 10
  5331. xp_assert_equal(mm[0], ref)
  5332. xp_assert_equal(mm[1], ref)
  5333. xp_assert_equal(m, ref)
  5334. xp_assert_equal(v, ref)
  5335. xp_assert_equal(sk, ref)
  5336. xp_assert_equal(kurt, ref)
  5337. if is_numpy(xp):
  5338. self.describe_nan_policy_omit_test()
  5339. elif is_lazy_array(x):
  5340. with pytest.raises(TypeError, match='not supported for lazy arrays'):
  5341. stats.describe(x, nan_policy='omit')
  5342. message = 'nan_policy must be one of...'
  5343. with pytest.raises(ValueError, match=message):
  5344. stats.describe(x, nan_policy='foobar')
  5345. def test_describe_result_attributes(self):
  5346. # some result attributes are tuples, which aren't meant to be compared
  5347. # with `xp_assert_close`
  5348. actual = stats.describe(np.arange(5.))
  5349. attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness', 'kurtosis')
  5350. check_named_results(actual, attributes)
  5351. def test_describe_ddof(self, xp):
  5352. x = xp.concat((xp.ones((3, 4)), xp.full((2, 4), 2.)))
  5353. nc = 5
  5354. mmc = (xp.asarray([1., 1., 1., 1.]), xp.asarray([2., 2., 2., 2.]))
  5355. mc = xp.asarray([1.4, 1.4, 1.4, 1.4])
  5356. vc = xp.asarray([0.24, 0.24, 0.24, 0.24])
  5357. skc = xp.asarray([0.40824829046386357] * 4)
  5358. kurtc = xp.asarray([-1.833333333333333] * 4)
  5359. n, mm, m, v, sk, kurt = stats.describe(x, ddof=0)
  5360. assert n == nc
  5361. xp_assert_equal(mm[0], mmc[0])
  5362. xp_assert_equal(mm[1], mmc[1])
  5363. xp_assert_close(m, mc)
  5364. xp_assert_close(v, vc)
  5365. xp_assert_close(sk, skc)
  5366. xp_assert_close(kurt, kurtc)
  5367. def test_describe_axis_none(self, xp):
  5368. x = xp.concat((xp.ones((3, 4)), xp.full((2, 4), 2.)))
  5369. # expected values
  5370. nc = 20
  5371. mmc = (xp.asarray(1.0), xp.asarray(2.0))
  5372. mc = xp.asarray(1.3999999999999999)
  5373. vc = xp.asarray(0.25263157894736848)
  5374. skc = xp.asarray(0.4082482904638634)
  5375. kurtc = xp.asarray(-1.8333333333333333)
  5376. # actual values
  5377. n, mm, m, v, sk, kurt = stats.describe(x, axis=None)
  5378. assert n == nc
  5379. xp_assert_equal(mm[0], mmc[0])
  5380. xp_assert_equal(mm[1], mmc[1])
  5381. xp_assert_close(m, mc)
  5382. xp_assert_close(v, vc)
  5383. xp_assert_close(sk, skc)
  5384. xp_assert_close(kurt, kurtc)
  5385. def test_describe_empty(self, xp):
  5386. message = "The input must not be empty."
  5387. with pytest.raises(ValueError, match=message):
  5388. stats.describe(xp.asarray([]))
  5389. class NormalityTests:
  5390. @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater'])
  5391. def test_against_R(self, alternative, xp):
  5392. # testa against R `dagoTest` from package `fBasics`
  5393. # library(fBasics)
  5394. # options(digits=16)
  5395. # x = c(-2, -1, 0, 1, 2, 3)**2
  5396. # x = rep(x, times=4)
  5397. # test_result <- dagoTest(x)
  5398. # test_result@test$statistic
  5399. # test_result@test$p.value
  5400. test_name = self.test_name
  5401. test_fun = getattr(stats, test_name)
  5402. ref_statistic= xp.asarray(self.case_ref[0])
  5403. ref_pvalue = xp.asarray(self.case_ref[1])
  5404. kwargs = {}
  5405. if alternative in {'less', 'greater'}:
  5406. if test_name in {'skewtest', 'kurtosistest'}:
  5407. ref_pvalue = ref_pvalue/2 if alternative == "less" else 1-ref_pvalue/2
  5408. ref_pvalue = 1-ref_pvalue if test_name == 'skewtest' else ref_pvalue
  5409. kwargs['alternative'] = alternative
  5410. else:
  5411. pytest.skip('`alternative` not available for `normaltest`')
  5412. x = xp.asarray((-2, -1, 0, 1, 2, 3.)*4)**2
  5413. res = test_fun(x, **kwargs)
  5414. res_statistic, res_pvalue = res
  5415. xp_assert_close(res_statistic, ref_statistic)
  5416. xp_assert_close(res_pvalue, ref_pvalue)
  5417. check_named_results(res, ('statistic', 'pvalue'), xp=xp)
  5418. def test_nan(self, xp):
  5419. # nan in input -> nan output (default nan_policy='propagate')
  5420. test_fun = getattr(stats, self.test_name)
  5421. x = xp.arange(30.)
  5422. NaN = xp.asarray(xp.nan, dtype=x.dtype)
  5423. x = xp.where(x == 29, NaN, x)
  5424. with np.errstate(invalid="ignore"):
  5425. res = test_fun(x)
  5426. xp_assert_equal(res.statistic, NaN)
  5427. xp_assert_equal(res.pvalue, NaN)
  5428. @make_xp_test_case(stats.skewtest)
  5429. class TestSkewTest(NormalityTests):
  5430. test_name = 'skewtest'
  5431. case_ref = (1.98078826090875881, 0.04761502382843208) # statistic, pvalue
  5432. def test_intuitive(self, xp):
  5433. # intuitive tests; see gh-13549. skewnorm with parameter 1 has skew > 0
  5434. a1 = stats.skewnorm.rvs(a=1, size=10000, random_state=123)
  5435. a1_xp = xp.asarray(a1)
  5436. pval = stats.skewtest(a1_xp, alternative='greater').pvalue
  5437. xp_assert_close(pval, xp.asarray(0.0, dtype=a1_xp.dtype), atol=9e-6)
  5438. def test_skewtest_too_few_observations(self, xp):
  5439. # Regression test for ticket #1492.
  5440. # skewtest requires at least 8 observations; 7 should warn and return NaN.
  5441. stats.skewtest(xp.arange(8.0))
  5442. x = xp.arange(7.0)
  5443. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  5444. res = stats.skewtest(x)
  5445. NaN = xp.asarray(xp.nan)
  5446. xp_assert_equal(res.statistic, NaN)
  5447. xp_assert_equal(res.pvalue, NaN)
  5448. @make_xp_test_case(stats.kurtosistest)
  5449. class TestKurtosisTest(NormalityTests):
  5450. test_name = 'kurtosistest'
  5451. case_ref = (-0.01403734404759738, 0.98880018772590561) # statistic, pvalue
  5452. def test_intuitive(self, xp):
  5453. # intuitive tests; see gh-13549. excess kurtosis of laplace is 3 > 0
  5454. a2 = stats.laplace.rvs(size=10000, random_state=123)
  5455. a2_xp = xp.asarray(a2)
  5456. pval = stats.kurtosistest(a2_xp, alternative='greater').pvalue
  5457. xp_assert_close(pval, xp.asarray(0.0, dtype=a2_xp.dtype), atol=1e-15)
  5458. def test_gh9033_regression(self, xp):
  5459. # regression test for issue gh-9033: x clearly non-normal but power of
  5460. # negative denom needs to be handled correctly to reject normality
  5461. counts = [128, 0, 58, 7, 0, 41, 16, 0, 0, 167]
  5462. x = np.hstack([np.full(c, i) for i, c in enumerate(counts)])
  5463. x = xp.asarray(x, dtype=xp.float64)
  5464. assert stats.kurtosistest(x)[1] < 0.01
  5465. def test_kurtosistest_too_few_observations(self, xp):
  5466. # kurtosistest requires at least 5 observations; 4 should warn and return NaN.
  5467. # Regression test for ticket #1425.
  5468. stats.kurtosistest(xp.arange(5.0))
  5469. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  5470. res = stats.kurtosistest(xp.arange(4.))
  5471. NaN = xp.asarray(xp.nan)
  5472. xp_assert_equal(res.statistic, NaN)
  5473. xp_assert_equal(res.pvalue, NaN)
  5474. @make_xp_test_case(stats.normaltest)
  5475. class TestNormalTest(NormalityTests):
  5476. test_name = 'normaltest'
  5477. case_ref = (3.92371918158185551, 0.14059672529747502) # statistic, pvalue
  5478. def test_too_few_observations(self, xp):
  5479. stats.normaltest(xp.arange(8.))
  5480. # 1D sample has too few observations -> warning / NaN output
  5481. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  5482. res = stats.normaltest(xp.arange(7.))
  5483. NaN = xp.asarray(xp.nan)
  5484. xp_assert_equal(res.statistic, NaN)
  5485. xp_assert_equal(res.pvalue, NaN)
  5486. class TestRankSums:
  5487. rng = np.random.default_rng(3417115752)
  5488. x, y = rng.random((2, 10))
  5489. @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
  5490. def test_ranksums_result_attributes(self, alternative):
  5491. # ranksums pval = mannwhitneyu pval w/out continuity or tie correction
  5492. res1 = stats.ranksums(self.x, self.y,
  5493. alternative=alternative).pvalue
  5494. res2 = stats.mannwhitneyu(self.x, self.y, use_continuity=False,
  5495. alternative=alternative).pvalue
  5496. assert_allclose(res1, res2)
  5497. def test_ranksums_named_results(self):
  5498. res = stats.ranksums(self.x, self.y)
  5499. check_named_results(res, ('statistic', 'pvalue'))
  5500. def test_input_validation(self):
  5501. with assert_raises(ValueError, match="`alternative` must be 'less'"):
  5502. stats.ranksums(self.x, self.y, alternative='foobar')
  5503. @make_xp_test_case(stats.jarque_bera)
  5504. class TestJarqueBera:
  5505. def test_jarque_bera_against_R(self, xp):
  5506. # library(tseries)
  5507. # options(digits=16)
  5508. # x < - rnorm(5)
  5509. # jarque.bera.test(x)
  5510. x = [-0.160104223201523288, 1.131262000934478040, -0.001235254523709458,
  5511. -0.776440091309490987, -2.072959999533182884]
  5512. x = xp.asarray(x)
  5513. ref = xp.asarray([0.17651605223752, 0.9155246169805])
  5514. res = stats.jarque_bera(x)
  5515. xp_assert_close(res.statistic, ref[0])
  5516. xp_assert_close(res.pvalue, ref[1])
  5517. @skip_xp_backends(np_only=True)
  5518. def test_jarque_bera_array_like(self, xp):
  5519. # array-like only relevant for NumPy
  5520. rng = np.random.default_rng(9294968266)
  5521. x = rng.standard_normal(size=100000)
  5522. jb_test1 = JB1, p1 = stats.jarque_bera(list(x))
  5523. jb_test2 = JB2, p2 = stats.jarque_bera(tuple(x))
  5524. jb_test3 = JB3, p3 = stats.jarque_bera(x.reshape(2, 50000))
  5525. assert JB1 == JB2 == JB3 == jb_test1.statistic == jb_test2.statistic == jb_test3.statistic # noqa: E501
  5526. assert p1 == p2 == p3 == jb_test1.pvalue == jb_test2.pvalue == jb_test3.pvalue
  5527. @skip_xp_backends('array_api_strict', reason='Noisy; see TestSkew')
  5528. def test_jarque_bera_too_few_observations(self, xp):
  5529. x = xp.asarray([])
  5530. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  5531. res = stats.jarque_bera(x)
  5532. NaN = xp.asarray(xp.nan)
  5533. xp_assert_equal(res.statistic, NaN)
  5534. xp_assert_equal(res.pvalue, NaN)
  5535. def test_axis(self, xp):
  5536. rng = np.random.RandomState(seed=122398129)
  5537. x = xp.asarray(rng.random(size=(2, 45)))
  5538. res = stats.jarque_bera(x, axis=None)
  5539. ref = stats.jarque_bera(xp.reshape(x, (-1,)))
  5540. xp_assert_equal(res.statistic, ref.statistic)
  5541. xp_assert_equal(res.pvalue, ref.pvalue)
  5542. res = stats.jarque_bera(x, axis=1)
  5543. s0, p0 = stats.jarque_bera(x[0, :])
  5544. s1, p1 = stats.jarque_bera(x[1, :])
  5545. xp_assert_close(res.statistic, xp.stack([s0, s1]))
  5546. xp_assert_close(res.pvalue, xp.stack([p0, p1]))
  5547. resT = stats.jarque_bera(x.T, axis=0)
  5548. xp_assert_close(res.statistic, resT.statistic)
  5549. xp_assert_close(res.pvalue, resT.pvalue)
  5550. def test_pointbiserial():
  5551. # same as mstats test except for the nan
  5552. # Test data: https://web.archive.org/web/20060504220742/https://support.sas.com/ctx/samples/index.jsp?sid=490&tab=output
  5553. x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
  5554. 0,0,0,0,1]
  5555. y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
  5556. 2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1,
  5557. 0.8,0.7,0.6,0.5,0.2,0.2,0.1]
  5558. assert_almost_equal(stats.pointbiserialr(x, y)[0], 0.36149, 5)
  5559. # test for namedtuple attribute results
  5560. attributes = ('correlation', 'pvalue')
  5561. res = stats.pointbiserialr(x, y)
  5562. check_named_results(res, attributes)
  5563. assert_equal(res.correlation, res.statistic)
  5564. def test_obrientransform():
  5565. # A couple tests calculated by hand.
  5566. x1 = np.array([0, 2, 4])
  5567. t1 = stats.obrientransform(x1)
  5568. expected = [7, -2, 7]
  5569. assert_allclose(t1[0], expected)
  5570. x2 = np.array([0, 3, 6, 9])
  5571. t2 = stats.obrientransform(x2)
  5572. expected = np.array([30, 0, 0, 30])
  5573. assert_allclose(t2[0], expected)
  5574. # Test two arguments.
  5575. a, b = stats.obrientransform(x1, x2)
  5576. assert_equal(a, t1[0])
  5577. assert_equal(b, t2[0])
  5578. # Test three arguments.
  5579. a, b, c = stats.obrientransform(x1, x2, x1)
  5580. assert_equal(a, t1[0])
  5581. assert_equal(b, t2[0])
  5582. assert_equal(c, t1[0])
  5583. # This is a regression test to check np.var replacement.
  5584. # The author of this test didn't separately verify the numbers.
  5585. x1 = np.arange(5)
  5586. result = np.array(
  5587. [[5.41666667, 1.04166667, -0.41666667, 1.04166667, 5.41666667],
  5588. [21.66666667, 4.16666667, -1.66666667, 4.16666667, 21.66666667]])
  5589. assert_array_almost_equal(stats.obrientransform(x1, 2*x1), result, decimal=8)
  5590. # Example from "O'Brien Test for Homogeneity of Variance"
  5591. # by Herve Abdi.
  5592. values = range(5, 11)
  5593. reps = np.array([5, 11, 9, 3, 2, 2])
  5594. data = np.repeat(values, reps)
  5595. transformed_values = np.array([3.1828, 0.5591, 0.0344,
  5596. 1.6086, 5.2817, 11.0538])
  5597. expected = np.repeat(transformed_values, reps)
  5598. result = stats.obrientransform(data)
  5599. assert_array_almost_equal(result[0], expected, decimal=4)
  5600. def check_equal_xmean(*args, xp, mean_fun, axis=None, dtype=None,
  5601. rtol=1e-7, weights=None):
  5602. # Note this doesn't test when axis is not specified
  5603. dtype = dtype or xp.float64
  5604. if len(args) == 2:
  5605. array_like, desired = args
  5606. else:
  5607. array_like, p, desired = args
  5608. array_like = xp.asarray(array_like, dtype=dtype)
  5609. desired = xp.asarray(desired, dtype=dtype)
  5610. weights = xp.asarray(weights, dtype=dtype) if weights is not None else weights
  5611. args = (array_like,) if len(args) == 2 else (array_like, p)
  5612. x = mean_fun(*args, axis=axis, dtype=dtype, weights=weights)
  5613. xp_assert_close(x, desired, rtol=rtol)
  5614. def check_equal_gmean(*args, **kwargs):
  5615. return check_equal_xmean(*args, mean_fun=stats.gmean, **kwargs)
  5616. def check_equal_hmean(*args, **kwargs):
  5617. return check_equal_xmean(*args, mean_fun=stats.hmean, **kwargs)
  5618. def check_equal_pmean(*args, **kwargs):
  5619. return check_equal_xmean(*args, mean_fun=stats.pmean, **kwargs)
  5620. @make_xp_test_case(stats.hmean)
  5621. class TestHMean:
  5622. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  5623. def test_0(self, xp):
  5624. a = [1, 0, 2]
  5625. desired = 0
  5626. check_equal_hmean(a, desired, xp=xp)
  5627. def test_1d(self, xp):
  5628. # Test a 1d case
  5629. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
  5630. desired = 34.1417152147
  5631. check_equal_hmean(a, desired, xp=xp)
  5632. a = [1, 2, 3, 4]
  5633. desired = 4. / (1. / 1 + 1. / 2 + 1. / 3 + 1. / 4)
  5634. check_equal_hmean(a, desired, xp=xp)
  5635. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  5636. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  5637. def test_1d_with_zero(self, xp):
  5638. a = np.array([1, 0])
  5639. desired = 0.0
  5640. check_equal_hmean(a, desired, xp=xp, rtol=0.0)
  5641. @pytest.mark.filterwarnings(
  5642. "ignore:divide by zero encountered:RuntimeWarning"
  5643. ) # for dask
  5644. def test_1d_with_negative_value(self, xp):
  5645. a = np.array([1, 0, -1])
  5646. message = "The harmonic mean is only defined..."
  5647. with pytest.warns(RuntimeWarning, match=message):
  5648. check_equal_hmean(a, xp.nan, xp=xp, rtol=0.0)
  5649. # Note the next tests use axis=None as default, not axis=0
  5650. def test_2d(self, xp):
  5651. # Test a 2d case
  5652. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5653. desired = 38.6696271841
  5654. check_equal_hmean(np.array(a), desired, xp=xp)
  5655. def test_2d_axis0(self, xp):
  5656. # Test a 2d case with axis=0
  5657. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5658. desired = np.array([22.88135593, 39.13043478, 52.90076336, 65.45454545])
  5659. check_equal_hmean(a, desired, axis=0, xp=xp)
  5660. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  5661. def test_2d_axis0_with_zero(self, xp):
  5662. a = [[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5663. desired = np.array([22.88135593, 0.0, 52.90076336, 65.45454545])
  5664. check_equal_hmean(a, desired, axis=0, xp=xp)
  5665. def test_2d_axis1(self, xp):
  5666. # Test a 2d case with axis=1
  5667. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5668. desired = np.array([19.2, 63.03939962, 103.80078637])
  5669. check_equal_hmean(a, desired, axis=1, xp=xp)
  5670. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  5671. def test_2d_axis1_with_zero(self, xp):
  5672. a = [[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5673. desired = np.array([0.0, 63.03939962, 103.80078637])
  5674. check_equal_hmean(a, desired, axis=1, xp=xp)
  5675. @skip_xp_backends(
  5676. np_only=True,
  5677. reason='array-likes only supported for NumPy backend',
  5678. )
  5679. def test_weights_1d_list(self, xp):
  5680. # Desired result from:
  5681. # https://www.hackmath.net/en/math-problem/35871
  5682. a = [2, 10, 6]
  5683. weights = [10, 5, 3]
  5684. desired = 3.
  5685. # all the other tests use `check_equal_hmean`, which now converts
  5686. # the input to an xp-array before calling `hmean`. This time, check
  5687. # that the function still accepts the lists of ints.
  5688. res = stats.hmean(a, weights=weights)
  5689. xp_assert_close(res, np.asarray(desired), rtol=1e-5)
  5690. def test_weights_1d(self, xp):
  5691. # Desired result from:
  5692. # https://www.hackmath.net/en/math-problem/35871
  5693. a = np.asarray([2, 10, 6])
  5694. weights = np.asarray([10, 5, 3])
  5695. desired = 3
  5696. check_equal_hmean(a, desired, weights=weights, rtol=1e-5, xp=xp)
  5697. def test_weights_2d_axis0(self, xp):
  5698. # Desired result from:
  5699. # https://www.hackmath.net/en/math-problem/35871
  5700. a = np.array([[2, 5], [10, 5], [6, 5]])
  5701. weights = np.array([[10, 1], [5, 1], [3, 1]])
  5702. desired = np.array([3, 5])
  5703. check_equal_hmean(a, desired, axis=0, weights=weights, rtol=1e-5, xp=xp)
  5704. def test_weights_2d_axis1(self, xp):
  5705. # Desired result from:
  5706. # https://www.hackmath.net/en/math-problem/35871
  5707. a = np.array([[2, 10, 6], [7, 7, 7]])
  5708. weights = np.array([[10, 5, 3], [1, 1, 1]])
  5709. desired = np.array([3, 7])
  5710. check_equal_hmean(a, desired, axis=1, weights=weights, rtol=1e-5, xp=xp)
  5711. @skip_xp_invalid_arg
  5712. def test_weights_masked_1d_array(self, xp):
  5713. # Desired result from:
  5714. # https://www.hackmath.net/en/math-problem/35871
  5715. a = np.array([2, 10, 6, 42])
  5716. weights = np.ma.array([10, 5, 3, 42], mask=[0, 0, 0, 1])
  5717. desired = 3
  5718. xp = np.ma # check_equal_hmean uses xp.asarray; this will preserve the mask
  5719. check_equal_hmean(a, desired, weights=weights, rtol=1e-5,
  5720. dtype=np.float64, xp=xp)
  5721. @make_xp_test_case(stats.gmean)
  5722. class TestGMean:
  5723. @pytest.mark.filterwarnings(
  5724. "ignore:divide by zero encountered in log:RuntimeWarning:dask"
  5725. )
  5726. def test_0(self, xp):
  5727. a = [1, 0, 2]
  5728. desired = 0
  5729. check_equal_gmean(a, desired, xp=xp)
  5730. def test_1d(self, xp):
  5731. # Test a 1d case
  5732. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
  5733. desired = 45.2872868812
  5734. check_equal_gmean(a, desired, xp=xp)
  5735. a = [1, 2, 3, 4]
  5736. desired = power(1 * 2 * 3 * 4, 1. / 4.)
  5737. check_equal_gmean(a, desired, rtol=1e-14, xp=xp)
  5738. a = array([1, 2, 3, 4], float32)
  5739. desired = power(1 * 2 * 3 * 4, 1. / 4.)
  5740. check_equal_gmean(a, desired, dtype=xp.float32, xp=xp)
  5741. # Note the next tests use axis=None as default, not axis=0
  5742. def test_2d(self, xp):
  5743. # Test a 2d case
  5744. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5745. desired = 52.8885199
  5746. check_equal_gmean(a, desired, xp=xp)
  5747. def test_2d_axis0(self, xp):
  5748. # Test a 2d case with axis=0
  5749. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5750. desired = np.array([35.56893304, 49.32424149, 61.3579244, 72.68482371])
  5751. check_equal_gmean(a, desired, axis=0, xp=xp)
  5752. a = array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
  5753. desired = array([1, 2, 3, 4])
  5754. check_equal_gmean(a, desired, axis=0, rtol=1e-14, xp=xp)
  5755. def test_2d_axis1(self, xp):
  5756. # Test a 2d case with axis=1
  5757. a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]]
  5758. desired = np.array([22.13363839, 64.02171746, 104.40086817])
  5759. check_equal_gmean(a, desired, axis=1, xp=xp)
  5760. a = array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
  5761. v = power(1 * 2 * 3 * 4, 1. / 4.)
  5762. desired = array([v, v, v])
  5763. check_equal_gmean(a, desired, axis=1, rtol=1e-14, xp=xp)
  5764. def test_large_values(self, xp):
  5765. a = array([1e100, 1e200, 1e300])
  5766. desired = 1e200
  5767. check_equal_gmean(a, desired, rtol=1e-13, xp=xp)
  5768. @pytest.mark.filterwarnings(
  5769. "ignore:divide by zero encountered in log:RuntimeWarning:dask"
  5770. )
  5771. def test_1d_with_0(self, xp):
  5772. # Test a 1d case with zero element
  5773. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 0]
  5774. desired = 0.0 # due to exp(-inf)=0
  5775. with np.errstate(all='ignore'):
  5776. check_equal_gmean(a, desired, xp=xp)
  5777. @pytest.mark.filterwarnings(
  5778. "ignore:invalid value encountered in log:RuntimeWarning:dask"
  5779. )
  5780. def test_1d_neg(self, xp):
  5781. # Test a 1d case with negative element
  5782. a = [10, 20, 30, 40, 50, 60, 70, 80, 90, -1]
  5783. desired = np.nan # due to log(-1) = nan
  5784. with np.errstate(invalid='ignore'):
  5785. check_equal_gmean(a, desired, xp=xp)
  5786. @skip_xp_backends(
  5787. np_only=True,
  5788. reason='array-likes only supported for NumPy backend',
  5789. )
  5790. def test_weights_1d_list(self, xp):
  5791. # Desired result from:
  5792. # https://www.dummies.com/education/math/business-statistics/how-to-find-the-weighted-geometric-mean-of-a-data-set/
  5793. a = [1, 2, 3, 4, 5]
  5794. weights = [2, 5, 6, 4, 3]
  5795. desired = 2.77748
  5796. # all the other tests use `check_equal_gmean`, which now converts
  5797. # the input to an xp-array before calling `gmean`. This time, check
  5798. # that the function still accepts the lists of ints.
  5799. res = stats.gmean(a, weights=weights)
  5800. xp_assert_close(res, np.asarray(desired), rtol=1e-5)
  5801. def test_weights_1d(self, xp):
  5802. # Desired result from:
  5803. # https://www.dummies.com/education/math/business-statistics/how-to-find-the-weighted-geometric-mean-of-a-data-set/
  5804. a = np.array([1, 2, 3, 4, 5])
  5805. weights = np.array([2, 5, 6, 4, 3])
  5806. desired = 2.77748
  5807. check_equal_gmean(a, desired, weights=weights, rtol=1e-5, xp=xp)
  5808. @skip_xp_invalid_arg
  5809. def test_weights_masked_1d_array(self, xp):
  5810. # Desired result from:
  5811. # https://www.dummies.com/education/math/business-statistics/how-to-find-the-weighted-geometric-mean-of-a-data-set/
  5812. a = np.array([1, 2, 3, 4, 5, 6])
  5813. weights = np.ma.array([2, 5, 6, 4, 3, 5], mask=[0, 0, 0, 0, 0, 1])
  5814. desired = 2.77748
  5815. xp = np.ma # check_equal_gmean uses xp.asarray; this will preserve the mask
  5816. check_equal_gmean(a, desired, weights=weights, rtol=1e-5,
  5817. dtype=np.float64, xp=xp)
  5818. @make_xp_test_case(stats.pmean)
  5819. class TestPMean:
  5820. def pmean_reference(a, p):
  5821. return (np.sum(a**p) / a.size)**(1/p)
  5822. def wpmean_reference(a, p, weights):
  5823. return (np.sum(weights * a**p) / np.sum(weights))**(1/p)
  5824. def test_bad_exponent(self, xp):
  5825. with pytest.raises(ValueError, match='Power mean only defined for'):
  5826. stats.pmean(xp.asarray([1, 2, 3]), xp.asarray([0]))
  5827. def test_1d(self, xp):
  5828. a, p = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 3.5
  5829. desired = TestPMean.pmean_reference(np.array(a), p)
  5830. check_equal_pmean(a, p, desired, xp=xp)
  5831. a, p = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], -2.5
  5832. desired = TestPMean.pmean_reference(np.array(a), p)
  5833. check_equal_pmean(a, p, desired, xp=xp)
  5834. a, p = [1, 2, 3, 4], 2
  5835. desired = np.sqrt((1**2 + 2**2 + 3**2 + 4**2) / 4)
  5836. check_equal_pmean(a, p, desired, xp=xp)
  5837. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  5838. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  5839. def test_1d_with_zero(self, xp):
  5840. a, p = np.array([1, 0]), -1
  5841. desired = 0.0
  5842. check_equal_pmean(a, p, desired, rtol=0.0, xp=xp)
  5843. def test_1d_with_negative_value(self, xp):
  5844. a, p = np.array([1, 0, -1]), 1.23
  5845. message = "The power mean is only defined..."
  5846. with pytest.warns(RuntimeWarning, match=message):
  5847. check_equal_pmean(a, p, xp.nan, xp=xp)
  5848. @pytest.mark.parametrize(
  5849. ("a", "p"),
  5850. [([[10, 20], [50, 60], [90, 100]], -0.5),
  5851. (np.array([[10, 20], [50, 60], [90, 100]]), 0.5)]
  5852. )
  5853. def test_2d_axisnone(self, a, p, xp):
  5854. desired = TestPMean.pmean_reference(np.array(a), p)
  5855. check_equal_pmean(a, p, desired, xp=xp)
  5856. @pytest.mark.parametrize(
  5857. ("a", "p"),
  5858. [([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], -0.5),
  5859. ([[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], 0.5)]
  5860. )
  5861. def test_2d_axis0(self, a, p, xp):
  5862. desired = [
  5863. TestPMean.pmean_reference(
  5864. np.array([a[i][j] for i in range(len(a))]), p
  5865. )
  5866. for j in range(len(a[0]))
  5867. ]
  5868. check_equal_pmean(a, p, desired, axis=0, xp=xp)
  5869. @pytest.mark.parametrize(
  5870. ("a", "p"),
  5871. [([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], -0.5),
  5872. ([[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], 0.5)]
  5873. )
  5874. def test_2d_axis1(self, a, p, xp):
  5875. desired = [TestPMean.pmean_reference(np.array(a_), p) for a_ in a]
  5876. check_equal_pmean(a, p, desired, axis=1, xp=xp)
  5877. def test_weights_1d(self, xp):
  5878. a, p = [2, 10, 6], -1.23456789
  5879. weights = [10, 5, 3]
  5880. desired = TestPMean.wpmean_reference(np.array(a), p, weights)
  5881. check_equal_pmean(a, p, desired, weights=weights, rtol=1e-5, xp=xp)
  5882. @skip_xp_backends(
  5883. np_only=True,
  5884. reason='array-likes only supported for NumPy backend',
  5885. )
  5886. def test_weights_1d_list(self, xp):
  5887. a, p = [2, 10, 6], -1.23456789
  5888. weights = [10, 5, 3]
  5889. desired = TestPMean.wpmean_reference(np.array(a), p, weights)
  5890. # all the other tests use `check_equal_pmean`, which now converts
  5891. # the input to an xp-array before calling `pmean`. This time, check
  5892. # that the function still accepts the lists of ints.
  5893. res = stats.pmean(a, p, weights=weights)
  5894. xp_assert_close(res, np.asarray(desired), rtol=1e-5)
  5895. @skip_xp_invalid_arg
  5896. def test_weights_masked_1d_array(self, xp):
  5897. a, p = np.array([2, 10, 6, 42]), 1
  5898. weights = np.ma.array([10, 5, 3, 42], mask=[0, 0, 0, 1])
  5899. desired = np.average(a, weights=weights)
  5900. xp = np.ma # check_equal_pmean uses xp.asarray; this will preserve the mask
  5901. check_equal_pmean(a, p, desired, weights=weights, rtol=1e-5,
  5902. dtype=np.float64, xp=xp)
  5903. @pytest.mark.parametrize(
  5904. ("axis", "fun_name", "p"),
  5905. [(None, "wpmean_reference", 9.87654321),
  5906. (0, "gmean", 0),
  5907. (1, "hmean", -1)]
  5908. )
  5909. def test_weights_2d(self, axis, fun_name, p, xp):
  5910. if fun_name == 'wpmean_reference':
  5911. def fun(a, axis, weights):
  5912. return TestPMean.wpmean_reference(a, p, weights)
  5913. else:
  5914. fun = getattr(stats, fun_name)
  5915. a = np.array([[2, 5], [10, 5], [6, 5]])
  5916. weights = np.array([[10, 1], [5, 1], [3, 1]])
  5917. desired = fun(a, axis=axis, weights=weights)
  5918. check_equal_pmean(a, p, desired, axis=axis, weights=weights, rtol=1e-5, xp=xp)
  5919. def test_infinite_p_gh23111(self):
  5920. # gh-23111 reported that `pmean` didn't work properly with infinite `p`;
  5921. # check that this raises an appropriate error message
  5922. message = "Power mean only implemented for finite `p`"
  5923. with pytest.raises(NotImplementedError, match=message):
  5924. stats.pmean([2], np.inf)
  5925. @make_xp_test_case(stats.gstd)
  5926. class TestGSTD:
  5927. # must add 1 as `gstd` is only defined for positive values
  5928. array_1d = (np.arange(2 * 3 * 4) + 1).tolist()
  5929. gstd_array_1d = 2.294407613602
  5930. array_3d = np.reshape(array_1d, (2, 3, 4)).tolist()
  5931. def test_1d_array(self, xp):
  5932. gstd_actual = stats.gstd(xp.asarray(self.array_1d))
  5933. xp_assert_close(gstd_actual, xp.asarray(self.gstd_array_1d))
  5934. @skip_xp_backends(np_only=True, reason="Only NumPy supports array-like input")
  5935. def test_1d_numeric_array_like_input(self, xp):
  5936. gstd_actual = stats.gstd(tuple(self.array_1d))
  5937. assert_allclose(gstd_actual, self.gstd_array_1d)
  5938. @skip_xp_invalid_arg
  5939. def test_raises_error_non_numeric_input(self, xp):
  5940. message = "could not convert string to float|The DType..."
  5941. with pytest.raises((ValueError, TypeError), match=message):
  5942. stats.gstd('You cannot take the logarithm of a string.')
  5943. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  5944. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  5945. @pytest.mark.parametrize('bad_value', (0, -1, np.inf, np.nan))
  5946. def test_returns_nan_invalid_value(self, bad_value, xp):
  5947. x = xp.asarray(self.array_1d + [bad_value])
  5948. if np.isfinite(bad_value) and not is_lazy_array(x):
  5949. message = "The geometric standard deviation is only defined..."
  5950. with pytest.warns(RuntimeWarning, match=message):
  5951. res = stats.gstd(x)
  5952. else:
  5953. res = stats.gstd(x)
  5954. xp_assert_equal(res, xp.asarray(np.nan))
  5955. def test_propagates_nan_values(self, xp):
  5956. a = xp.asarray([[1, 1, 1, 16], [xp.nan, 1, 2, 3]])
  5957. gstd_actual = stats.gstd(a, axis=1)
  5958. xp_assert_close(gstd_actual, xp.asarray([4, np.nan]))
  5959. @xfail_xp_backends("jax.numpy", reason="returns subnormal instead of nan")
  5960. def test_ddof_equal_to_number_of_observations(self, xp):
  5961. x = xp.asarray(self.array_1d)
  5962. res = stats.gstd(x, ddof=x.shape[0])
  5963. xp_assert_equal(res, xp.asarray(xp.nan))
  5964. def test_3d_array(self, xp):
  5965. x = xp.asarray(self.array_3d)
  5966. gstd_actual = stats.gstd(x, axis=None)
  5967. ref = xp.asarray(self.gstd_array_1d)
  5968. xp_assert_close(gstd_actual, ref)
  5969. def test_3d_array_axis_type_tuple(self, xp):
  5970. x = xp.asarray(self.array_3d)
  5971. gstd_actual = stats.gstd(x, axis=(1, 2))
  5972. ref = xp.asarray([2.12939215, 1.22120169])
  5973. xp_assert_close(gstd_actual, ref)
  5974. def test_3d_array_axis_0(self, xp):
  5975. x = xp.asarray(self.array_3d)
  5976. gstd_actual = stats.gstd(x, axis=0)
  5977. gstd_desired = xp.asarray([
  5978. [6.1330555493918, 3.958900210120, 3.1206598248344, 2.6651441426902],
  5979. [2.3758135028411, 2.174581428192, 2.0260062829505, 1.9115518327308],
  5980. [1.8205343606803, 1.746342404566, 1.6846557065742, 1.6325269194382]
  5981. ])
  5982. xp_assert_close(gstd_actual, gstd_desired)
  5983. def test_3d_array_axis_1(self, xp):
  5984. x = xp.asarray(self.array_3d)
  5985. gstd_actual = stats.gstd(x, axis=1)
  5986. gstd_desired = xp.asarray([
  5987. [3.118993630946, 2.275985934063, 1.933995977619, 1.742896469724],
  5988. [1.271693593916, 1.254158641801, 1.238774141609, 1.225164057869]
  5989. ])
  5990. xp_assert_close(gstd_actual, gstd_desired)
  5991. def test_3d_array_axis_2(self, xp):
  5992. x = xp.asarray(self.array_3d)
  5993. gstd_actual = stats.gstd(x, axis=2)
  5994. gstd_desired = xp.asarray([
  5995. [1.8242475707664, 1.2243686572447, 1.1318311657788],
  5996. [1.0934830582351, 1.0724479791887, 1.0591498540749]
  5997. ])
  5998. xp_assert_close(gstd_actual, gstd_desired)
  5999. def test_binomtest():
  6000. # precision tests compared to R for ticket:986
  6001. pp = np.concatenate((np.linspace(0.1, 0.2, 5),
  6002. np.linspace(0.45, 0.65, 5),
  6003. np.linspace(0.85, 0.95, 5)))
  6004. n = 501
  6005. x = 450
  6006. results = [0.0, 0.0, 1.0159969301994141e-304,
  6007. 2.9752418572150531e-275, 7.7668382922535275e-250,
  6008. 2.3381250925167094e-099, 7.8284591587323951e-081,
  6009. 9.9155947819961383e-065, 2.8729390725176308e-050,
  6010. 1.7175066298388421e-037, 0.0021070691951093692,
  6011. 0.12044570587262322, 0.88154763174802508, 0.027120993063129286,
  6012. 2.6102587134694721e-006]
  6013. for p, res in zip(pp, results):
  6014. assert_approx_equal(stats.binomtest(x, n, p).pvalue, res,
  6015. significant=12, err_msg=f'fail forp={p}')
  6016. assert_approx_equal(stats.binomtest(50, 100, 0.1).pvalue,
  6017. 5.8320387857343647e-024,
  6018. significant=12)
  6019. def test_binomtest2():
  6020. # test added for issue #2384
  6021. res2 = [
  6022. [1.0, 1.0],
  6023. [0.5, 1.0, 0.5],
  6024. [0.25, 1.00, 1.00, 0.25],
  6025. [0.125, 0.625, 1.000, 0.625, 0.125],
  6026. [0.0625, 0.3750, 1.0000, 1.0000, 0.3750, 0.0625],
  6027. [0.03125, 0.21875, 0.68750, 1.00000, 0.68750, 0.21875, 0.03125],
  6028. [0.015625, 0.125000, 0.453125, 1.000000, 1.000000, 0.453125, 0.125000,
  6029. 0.015625],
  6030. [0.0078125, 0.0703125, 0.2890625, 0.7265625, 1.0000000, 0.7265625,
  6031. 0.2890625, 0.0703125, 0.0078125],
  6032. [0.00390625, 0.03906250, 0.17968750, 0.50781250, 1.00000000,
  6033. 1.00000000, 0.50781250, 0.17968750, 0.03906250, 0.00390625],
  6034. [0.001953125, 0.021484375, 0.109375000, 0.343750000, 0.753906250,
  6035. 1.000000000, 0.753906250, 0.343750000, 0.109375000, 0.021484375,
  6036. 0.001953125]
  6037. ]
  6038. for k in range(1, 11):
  6039. res1 = [stats.binomtest(v, k, 0.5).pvalue for v in range(k + 1)]
  6040. assert_almost_equal(res1, res2[k-1], decimal=10)
  6041. def test_binomtest3():
  6042. # test added for issue #2384
  6043. # test when x == n*p and neighbors
  6044. res3 = [stats.binomtest(v, v*k, 1./k).pvalue
  6045. for v in range(1, 11) for k in range(2, 11)]
  6046. assert_equal(res3, np.ones(len(res3), int))
  6047. # > bt=c()
  6048. # > for(i in as.single(1:10)) {
  6049. # + for(k in as.single(2:10)) {
  6050. # + bt = c(bt, binom.test(i-1, k*i,(1/k))$p.value);
  6051. # + print(c(i+1, k*i,(1/k)))
  6052. # + }
  6053. # + }
  6054. binom_testm1 = np.array([
  6055. 0.5, 0.5555555555555556, 0.578125, 0.5904000000000003,
  6056. 0.5981224279835393, 0.603430543396034, 0.607304096221924,
  6057. 0.610255656871054, 0.612579511000001, 0.625, 0.670781893004115,
  6058. 0.68853759765625, 0.6980101120000006, 0.703906431368616,
  6059. 0.70793209416498, 0.7108561134173507, 0.713076544331419,
  6060. 0.714820192935702, 0.6875, 0.7268709038256367, 0.7418963909149174,
  6061. 0.74986110468096, 0.7548015520398076, 0.7581671424768577,
  6062. 0.760607984787832, 0.762459425024199, 0.7639120677676575, 0.7265625,
  6063. 0.761553963657302, 0.774800934828818, 0.7818005980538996,
  6064. 0.78613491480358, 0.789084353140195, 0.7912217659828884,
  6065. 0.79284214559524, 0.794112956558801, 0.75390625, 0.7856929451142176,
  6066. 0.7976688481430754, 0.8039848974727624, 0.807891868948366,
  6067. 0.8105487660137676, 0.812473307174702, 0.8139318233591120,
  6068. 0.815075399104785, 0.7744140625, 0.8037322594985427,
  6069. 0.814742863657656, 0.8205425178645808, 0.8241275984172285,
  6070. 0.8265645374416, 0.8283292196088257, 0.829666291102775,
  6071. 0.8307144686362666, 0.7905273437499996, 0.8178712053954738,
  6072. 0.828116983756619, 0.833508948940494, 0.8368403871552892,
  6073. 0.839104213210105, 0.840743186196171, 0.84198481438049,
  6074. 0.8429580531563676, 0.803619384765625, 0.829338573944648,
  6075. 0.8389591907548646, 0.84401876783902, 0.84714369697889,
  6076. 0.8492667010581667, 0.850803474598719, 0.851967542858308,
  6077. 0.8528799045949524, 0.8145294189453126, 0.838881732845347,
  6078. 0.847979024541911, 0.852760894015685, 0.8557134656773457,
  6079. 0.8577190131799202, 0.85917058278431, 0.860270010472127,
  6080. 0.861131648404582, 0.823802947998047, 0.846984756807511,
  6081. 0.855635653643743, 0.860180994825685, 0.86298688573253,
  6082. 0.864892525675245, 0.866271647085603, 0.867316125625004,
  6083. 0.8681346531755114
  6084. ])
  6085. # > bt=c()
  6086. # > for(i in as.single(1:10)) {
  6087. # + for(k in as.single(2:10)) {
  6088. # + bt = c(bt, binom.test(i+1, k*i,(1/k))$p.value);
  6089. # + print(c(i+1, k*i,(1/k)))
  6090. # + }
  6091. # + }
  6092. binom_testp1 = np.array([
  6093. 0.5, 0.259259259259259, 0.26171875, 0.26272, 0.2632244513031551,
  6094. 0.2635138663069203, 0.2636951804161073, 0.2638162407564354,
  6095. 0.2639010709000002, 0.625, 0.4074074074074074, 0.42156982421875,
  6096. 0.4295746560000003, 0.43473045988554, 0.4383309503172684,
  6097. 0.4409884859402103, 0.4430309389962837, 0.444649849401104, 0.6875,
  6098. 0.4927602499618962, 0.5096031427383425, 0.5189636628480,
  6099. 0.5249280070771274, 0.5290623300865124, 0.5320974248125793,
  6100. 0.5344204730474308, 0.536255847400756, 0.7265625, 0.5496019313526808,
  6101. 0.5669248746708034, 0.576436455045805, 0.5824538812831795,
  6102. 0.5866053321547824, 0.589642781414643, 0.5919618019300193,
  6103. 0.593790427805202, 0.75390625, 0.590868349763505, 0.607983393277209,
  6104. 0.617303847446822, 0.623172512167948, 0.627208862156123,
  6105. 0.6301556891501057, 0.632401894928977, 0.6341708982290303,
  6106. 0.7744140625, 0.622562037497196, 0.639236102912278, 0.648263335014579,
  6107. 0.65392850011132, 0.657816519817211, 0.660650782947676,
  6108. 0.662808780346311, 0.6645068560246006, 0.7905273437499996,
  6109. 0.6478843304312477, 0.6640468318879372, 0.6727589686071775,
  6110. 0.6782129857784873, 0.681950188903695, 0.684671508668418,
  6111. 0.686741824999918, 0.688369886732168, 0.803619384765625,
  6112. 0.668716055304315, 0.684360013879534, 0.6927642396829181,
  6113. 0.6980155964704895, 0.701609591890657, 0.7042244320992127,
  6114. 0.7062125081341817, 0.707775152962577, 0.8145294189453126,
  6115. 0.686243374488305, 0.7013873696358975, 0.709501223328243,
  6116. 0.714563595144314, 0.718024953392931, 0.7205416252126137,
  6117. 0.722454130389843, 0.723956813292035, 0.823802947998047,
  6118. 0.701255953767043, 0.715928221686075, 0.723772209289768,
  6119. 0.7286603031173616, 0.7319999279787631, 0.7344267920995765,
  6120. 0.736270323773157, 0.737718376096348
  6121. ])
  6122. res4_p1 = [stats.binomtest(v+1, v*k, 1./k).pvalue
  6123. for v in range(1, 11) for k in range(2, 11)]
  6124. res4_m1 = [stats.binomtest(v-1, v*k, 1./k).pvalue
  6125. for v in range(1, 11) for k in range(2, 11)]
  6126. assert_almost_equal(res4_p1, binom_testp1, decimal=13)
  6127. assert_almost_equal(res4_m1, binom_testm1, decimal=13)
  6128. class TestTrim:
  6129. # test trim functions
  6130. def test_trim1(self):
  6131. a = np.arange(11)
  6132. assert_equal(np.sort(stats.trim1(a, 0.1)), np.arange(10))
  6133. assert_equal(np.sort(stats.trim1(a, 0.2)), np.arange(9))
  6134. assert_equal(np.sort(stats.trim1(a, 0.2, tail='left')),
  6135. np.arange(2, 11))
  6136. assert_equal(np.sort(stats.trim1(a, 3/11., tail='left')),
  6137. np.arange(3, 11))
  6138. assert_equal(stats.trim1(a, 1.0), [])
  6139. assert_equal(stats.trim1(a, 1.0, tail='left'), [])
  6140. # empty input
  6141. assert_equal(stats.trim1([], 0.1), [])
  6142. assert_equal(stats.trim1([], 3/11., tail='left'), [])
  6143. assert_equal(stats.trim1([], 4/6.), [])
  6144. # test axis
  6145. a = np.arange(24).reshape(6, 4)
  6146. ref = np.arange(4, 24).reshape(5, 4) # first row trimmed
  6147. axis = 0
  6148. trimmed = stats.trim1(a, 0.2, tail='left', axis=axis)
  6149. assert_equal(np.sort(trimmed, axis=axis), ref)
  6150. axis = 1
  6151. trimmed = stats.trim1(a.T, 0.2, tail='left', axis=axis)
  6152. assert_equal(np.sort(trimmed, axis=axis), ref.T)
  6153. def test_trimboth(self):
  6154. a = np.arange(11)
  6155. assert_equal(np.sort(stats.trimboth(a, 3/11.)), np.arange(3, 8))
  6156. assert_equal(np.sort(stats.trimboth(a, 0.2)),
  6157. np.array([2, 3, 4, 5, 6, 7, 8]))
  6158. assert_equal(np.sort(stats.trimboth(np.arange(24).reshape(6, 4), 0.2)),
  6159. np.arange(4, 20).reshape(4, 4))
  6160. assert_equal(np.sort(stats.trimboth(np.arange(24).reshape(4, 6).T,
  6161. 2/6.)),
  6162. np.array([[2, 8, 14, 20], [3, 9, 15, 21]]))
  6163. assert_raises(ValueError, stats.trimboth,
  6164. np.arange(24).reshape(4, 6).T, 4/6.)
  6165. # empty input
  6166. assert_equal(stats.trimboth([], 0.1), [])
  6167. assert_equal(stats.trimboth([], 3/11.), [])
  6168. assert_equal(stats.trimboth([], 4/6.), [])
  6169. @make_xp_test_case(stats.trim_mean)
  6170. class TestTrimMean:
  6171. def test_trim_mean(self, xp):
  6172. # don't use pre-sorted arrays
  6173. idx = np.array([3, 5, 0, 1, 2, 4])
  6174. a2 = np.arange(24).reshape(6, 4)[idx, :]
  6175. a3 = np.arange(24).reshape(6, 4, order='F')[idx, :]
  6176. xp_assert_equal(stats.trim_mean(xp.asarray(a3), 2/6.),
  6177. xp.asarray([2.5, 8.5, 14.5, 20.5]))
  6178. xp_assert_equal(stats.trim_mean(xp.asarray(a2), 2/6.),
  6179. xp.asarray([10., 11., 12., 13.]))
  6180. idx4 = np.array([1, 0, 3, 2])
  6181. a4 = np.arange(24).reshape(4, 6)[idx4, :]
  6182. xp_assert_equal(stats.trim_mean(xp.asarray(a4), 2/6.),
  6183. xp.asarray([9., 10., 11., 12., 13., 14.]))
  6184. # shuffled arange(24) as array_like
  6185. a = [7, 11, 12, 21, 16, 6, 22, 1, 5, 0, 18, 10, 17, 9, 19, 15, 23,
  6186. 20, 2, 14, 4, 13, 8, 3]
  6187. xp_assert_equal(stats.trim_mean(xp.asarray(a), 2/6.), xp.asarray(11.5))
  6188. xp_assert_equal(stats.trim_mean(xp.asarray([5, 4, 3, 1, 2, 0]), 2/6.),
  6189. xp.asarray(2.5))
  6190. # check axis argument
  6191. rng = np.random.default_rng(3417115752)
  6192. a = rng.integers(20, size=(5, 6, 4, 7))
  6193. a = xp.asarray(a)
  6194. for axis in [0, 1, 2, 3, -1]:
  6195. res1 = stats.trim_mean(a, 2/6., axis=axis)
  6196. res2 = stats.trim_mean(xp.moveaxis(a, axis, 0), 2/6.)
  6197. xp_assert_equal(res1, res2)
  6198. res1 = stats.trim_mean(a, 2/6., axis=None)
  6199. res2 = stats.trim_mean(xp_ravel(a), 2/6.)
  6200. xp_assert_equal(res1, res2)
  6201. with pytest.raises(ValueError, match="Proportion too big."):
  6202. stats.trim_mean(a, 0.6)
  6203. @pytest.mark.skip_xp_backends('jax.numpy', reason="lazy -> no _axis_nan_policy")
  6204. @pytest.mark.skip_xp_backends('dask.array', reason="lazy -> no _axis_nan_policy")
  6205. def test_empty_input(self, xp):
  6206. # empty input
  6207. with pytest.warns(SmallSampleWarning, match='too small'):
  6208. xp_assert_equal(stats.trim_mean(xp.asarray([]), 0.0), xp.asarray(xp.nan))
  6209. with pytest.warns(SmallSampleWarning, match='too small'):
  6210. xp_assert_equal(stats.trim_mean(xp.asarray([]), 0.6), xp.asarray(xp.nan))
  6211. @make_xp_test_case(stats.sigmaclip)
  6212. class TestSigmaClip:
  6213. def test_sigmaclip1(self, xp):
  6214. a = xp.concat((xp.linspace(9.5, 10.5, 31),
  6215. xp.linspace(0., 20., 5)))
  6216. fact = 4 # default
  6217. c, low, upp = stats.sigmaclip(a)
  6218. assert xp.min(c) > low
  6219. assert xp.max(c) < upp
  6220. xp_assert_equal(low, xp.mean(c) - fact*xp.std(c))
  6221. xp_assert_equal(upp, xp.mean(c) + fact*xp.std(c))
  6222. assert c.shape == a.shape
  6223. def test_sigmaclip2(self, xp):
  6224. a = xp.concat((xp.linspace(9.5, 10.5, 31),
  6225. xp.linspace(0., 20., 5)))
  6226. fact = 1.5
  6227. c, low, upp = stats.sigmaclip(a, fact, fact)
  6228. assert xp.min(c) > low
  6229. assert xp.max(c) < upp
  6230. xp_assert_equal(low, xp.mean(c) - fact*xp.std(c))
  6231. xp_assert_equal(upp, xp.mean(c) + fact*xp.std(c))
  6232. assert c.shape == (4,)
  6233. assert a.shape == (36,)
  6234. def test_sigmaclip3(self, xp):
  6235. a = xp.concat((xp.linspace(9.5, 10.5, 11),
  6236. xp.linspace(-100., -50., 3)))
  6237. fact = 1.8
  6238. c, low, upp = stats.sigmaclip(a, fact, fact)
  6239. assert xp.min(c) > low
  6240. assert xp.max(c) < upp
  6241. xp_assert_equal(low, xp.mean(c) - fact*xp.std(c))
  6242. xp_assert_equal(upp, xp.mean(c) + fact*xp.std(c))
  6243. xp_assert_equal(c, xp.linspace(9.5, 10.5, 11))
  6244. def test_sigmaclip_result_attributes(self, xp):
  6245. a = xp.concat((xp.linspace(9.5, 10.5, 11),
  6246. xp.linspace(-100., -50., 3)))
  6247. fact = 1.8
  6248. res = stats.sigmaclip(a, fact, fact)
  6249. attributes = ('clipped', 'lower', 'upper')
  6250. check_named_results(res, attributes, xp=xp)
  6251. def test_std_zero(self, xp):
  6252. # regression test #8632
  6253. x = xp.ones(10)
  6254. xp_assert_equal(stats.sigmaclip(x)[0], x)
  6255. @make_xp_test_case(stats.alexandergovern)
  6256. class TestAlexanderGovern:
  6257. def test_compare_dtypes(self):
  6258. # leave this NumPy only
  6259. args = [[13, 13, 13, 13, 13, 13, 13, 12, 12],
  6260. [14, 13, 12, 12, 12, 12, 12, 11, 11],
  6261. [14, 14, 13, 13, 13, 13, 13, 12, 12],
  6262. [15, 14, 13, 13, 13, 12, 12, 12, 11]]
  6263. args_int16 = [np.asarray(arg, dtype=np.int16) for arg in args]
  6264. args_int32 = [np.asarray(arg, dtype=np.int32) for arg in args]
  6265. args_uint8 = [np.asarray(arg, dtype=np.uint8) for arg in args]
  6266. args_float64 = [np.asarray(arg, dtype=np.float64) for arg in args]
  6267. res_int16 = stats.alexandergovern(*args_int16)
  6268. res_int32 = stats.alexandergovern(*args_int32)
  6269. res_uint8 = stats.alexandergovern(*args_uint8)
  6270. res_float64 = stats.alexandergovern(*args_float64)
  6271. assert (res_int16.pvalue == res_int32.pvalue ==
  6272. res_uint8.pvalue == res_float64.pvalue)
  6273. assert (res_int16.statistic == res_int32.statistic ==
  6274. res_uint8.statistic == res_float64.statistic)
  6275. @skip_xp_backends('jax.numpy', reason="Requires `_axis_nan_policy` decorator")
  6276. @skip_xp_backends('dask.array', reason="Requires `_axis_nan_policy` decorator")
  6277. @pytest.mark.parametrize('case',[([1, 2], []), ([1, 2], 2), ([1, 2], [2])])
  6278. def test_too_small_inputs(self, case, xp):
  6279. # input array is of size zero or too small
  6280. dtype = xp_default_dtype(xp)
  6281. case = xp.asarray(case[0], dtype=dtype), xp.asarray(case[1], dtype=dtype)
  6282. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  6283. res = stats.alexandergovern(*case)
  6284. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  6285. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  6286. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
  6287. def test_bad_inputs(self, xp):
  6288. # inputs are not finite (infinity)
  6289. samples = xp.asarray([1., 2.]), xp.asarray([xp.inf, xp.inf])
  6290. res = stats.alexandergovern(*samples)
  6291. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  6292. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  6293. def test_compare_r(self, xp):
  6294. '''
  6295. Data generated in R with
  6296. > set.seed(1)
  6297. > library("onewaytests")
  6298. > library("tibble")
  6299. > y <- c(rnorm(40, sd=10),
  6300. + rnorm(30, sd=15),
  6301. + rnorm(20, sd=20))
  6302. > x <- c(rep("one", times=40),
  6303. + rep("two", times=30),
  6304. + rep("eight", times=20))
  6305. > x <- factor(x)
  6306. > ag.test(y ~ x, tibble(y,x))
  6307. Alexander-Govern Test (alpha = 0.05)
  6308. -------------------------------------------------------------
  6309. data : y and x
  6310. statistic : 1.359941
  6311. parameter : 2
  6312. p.value : 0.5066321
  6313. Result : Difference is not statistically significant.
  6314. -------------------------------------------------------------
  6315. Example adapted from:
  6316. https://eval-serv2.metpsy.uni-jena.de/wiki-metheval-hp/index.php/R_FUN_Alexander-Govern
  6317. '''
  6318. one = [-6.264538107423324, 1.8364332422208225, -8.356286124100471,
  6319. 15.952808021377916, 3.295077718153605, -8.204683841180152,
  6320. 4.874290524284853, 7.383247051292173, 5.757813516534923,
  6321. -3.0538838715635603, 15.11781168450848, 3.898432364114311,
  6322. -6.2124058054180376, -22.146998871774997, 11.249309181431082,
  6323. -0.4493360901523085, -0.16190263098946087, 9.438362106852992,
  6324. 8.212211950980885, 5.939013212175088, 9.189773716082183,
  6325. 7.821363007310671, 0.745649833651906, -19.89351695863373,
  6326. 6.198257478947102, -0.5612873952900078, -1.557955067053293,
  6327. -14.707523838992744, -4.781500551086204, 4.179415601997024,
  6328. 13.58679551529044, -1.0278772734299553, 3.876716115593691,
  6329. -0.5380504058290512, -13.770595568286065, -4.149945632996798,
  6330. -3.942899537103493, -0.5931339671118566, 11.000253719838831,
  6331. 7.631757484575442]
  6332. two = [-2.4678539438038034, -3.8004252020476135, 10.454450631071062,
  6333. 8.34994798010486, -10.331335418242798, -10.612427354431794,
  6334. 5.468729432052455, 11.527993867731237, -1.6851931822534207,
  6335. 13.216615896813222, 5.971588205506021, -9.180395898761569,
  6336. 5.116795371366372, -16.94044644121189, 21.495355525515556,
  6337. 29.7059984775879, -5.508322146997636, -15.662019394747961,
  6338. 8.545794411636193, -2.0258190582123654, 36.024266407571645,
  6339. -0.5886000409975387, 10.346090436761651, 0.4200323817099909,
  6340. -11.14909813323608, 2.8318844927151434, -27.074379433365568,
  6341. 21.98332292344329, 2.2988000731784655, 32.58917505543229]
  6342. eight = [9.510190577993251, -14.198928618436291, 12.214527069781099,
  6343. -18.68195263288503, -25.07266800478204, 5.828924710349257,
  6344. -8.86583746436866, 0.02210703263248262, 1.4868264830332811,
  6345. -11.79041892376144, -11.37337465637004, -2.7035723024766414,
  6346. 23.56173993146409, -30.47133600859524, 11.878923752568431,
  6347. 6.659007424270365, 21.261996745527256, -6.083678472686013,
  6348. 7.400376198325763, 5.341975815444621]
  6349. one, two, eight = xp.asarray(one), xp.asarray(two), xp.asarray(eight)
  6350. soln = stats.alexandergovern(one, two, eight)
  6351. xp_assert_close(soln.statistic, xp.asarray(1.3599405447999450836))
  6352. xp_assert_close(soln.pvalue, xp.asarray(0.50663205309676440091))
  6353. def test_compare_scholar(self, xp):
  6354. '''
  6355. Data taken from 'The Modification and Evaluation of the
  6356. Alexander-Govern Test in Terms of Power' by Kingsley Ochuko, T.,
  6357. Abdullah, S., Binti Zain, Z., & Soaad Syed Yahaya, S. (2015).
  6358. '''
  6359. young = [482.43, 484.36, 488.84, 495.15, 495.24, 502.69, 504.62,
  6360. 518.29, 519.1, 524.1, 524.12, 531.18, 548.42, 572.1, 584.68,
  6361. 609.09, 609.53, 666.63, 676.4]
  6362. middle = [335.59, 338.43, 353.54, 404.27, 437.5, 469.01, 485.85,
  6363. 487.3, 493.08, 494.31, 499.1, 886.41]
  6364. old = [519.01, 528.5, 530.23, 536.03, 538.56, 538.83, 557.24, 558.61,
  6365. 558.95, 565.43, 586.39, 594.69, 629.22, 645.69, 691.84]
  6366. young, middle, old = xp.asarray(young), xp.asarray(middle), xp.asarray(old)
  6367. soln = stats.alexandergovern(young, middle, old)
  6368. xp_assert_close(soln.statistic, xp.asarray(5.3237), atol=1e-3)
  6369. xp_assert_close(soln.pvalue, xp.asarray(0.06982), atol=1e-4)
  6370. # verify with ag.test in r
  6371. '''
  6372. > library("onewaytests")
  6373. > library("tibble")
  6374. > young <- c(482.43, 484.36, 488.84, 495.15, 495.24, 502.69, 504.62,
  6375. + 518.29, 519.1, 524.1, 524.12, 531.18, 548.42, 572.1,
  6376. + 584.68, 609.09, 609.53, 666.63, 676.4)
  6377. > middle <- c(335.59, 338.43, 353.54, 404.27, 437.5, 469.01, 485.85,
  6378. + 487.3, 493.08, 494.31, 499.1, 886.41)
  6379. > old <- c(519.01, 528.5, 530.23, 536.03, 538.56, 538.83, 557.24,
  6380. + 558.61, 558.95, 565.43, 586.39, 594.69, 629.22,
  6381. + 645.69, 691.84)
  6382. > young_fct <- c(rep("young", times=19))
  6383. > middle_fct <-c(rep("middle", times=12))
  6384. > old_fct <- c(rep("old", times=15))
  6385. > ag.test(a ~ b, tibble(a=c(young, middle, old), b=factor(c(young_fct,
  6386. + middle_fct, old_fct))))
  6387. Alexander-Govern Test (alpha = 0.05)
  6388. -------------------------------------------------------------
  6389. data : a and b
  6390. statistic : 5.324629
  6391. parameter : 2
  6392. p.value : 0.06978651
  6393. Result : Difference is not statistically significant.
  6394. -------------------------------------------------------------
  6395. '''
  6396. xp_assert_close(soln.statistic, xp.asarray(5.324629), rtol=2e-6)
  6397. xp_assert_close(soln.pvalue, xp.asarray(0.06978651), rtol=2e-6)
  6398. def test_compare_scholar3(self, xp):
  6399. '''
  6400. Data taken from 'Robustness And Comparative Power Of WelchAspin,
  6401. Alexander-Govern And Yuen Tests Under Non-Normality And Variance
  6402. Heteroscedasticity', by Ayed A. Almoied. 2017. Page 34-37.
  6403. https://digitalcommons.wayne.edu/cgi/viewcontent.cgi?article=2775&context=oa_dissertations
  6404. '''
  6405. x1 = [-1.77559, -1.4113, -0.69457, -0.54148, -0.18808, -0.07152,
  6406. 0.04696, 0.051183, 0.148695, 0.168052, 0.422561, 0.458555,
  6407. 0.616123, 0.709968, 0.839956, 0.857226, 0.929159, 0.981442,
  6408. 0.999554, 1.642958]
  6409. x2 = [-1.47973, -1.2722, -0.91914, -0.80916, -0.75977, -0.72253,
  6410. -0.3601, -0.33273, -0.28859, -0.09637, -0.08969, -0.01824,
  6411. 0.260131, 0.289278, 0.518254, 0.683003, 0.877618, 1.172475,
  6412. 1.33964, 1.576766]
  6413. x1, x2 = xp.asarray(x1), xp.asarray(x2)
  6414. soln = stats.alexandergovern(x1, x2)
  6415. xp_assert_close(soln.statistic, xp.asarray(0.713526), atol=1e-5)
  6416. xp_assert_close(soln.pvalue, xp.asarray(0.398276), atol=1e-5)
  6417. '''
  6418. tested in ag.test in R:
  6419. > library("onewaytests")
  6420. > library("tibble")
  6421. > x1 <- c(-1.77559, -1.4113, -0.69457, -0.54148, -0.18808, -0.07152,
  6422. + 0.04696, 0.051183, 0.148695, 0.168052, 0.422561, 0.458555,
  6423. + 0.616123, 0.709968, 0.839956, 0.857226, 0.929159, 0.981442,
  6424. + 0.999554, 1.642958)
  6425. > x2 <- c(-1.47973, -1.2722, -0.91914, -0.80916, -0.75977, -0.72253,
  6426. + -0.3601, -0.33273, -0.28859, -0.09637, -0.08969, -0.01824,
  6427. + 0.260131, 0.289278, 0.518254, 0.683003, 0.877618, 1.172475,
  6428. + 1.33964, 1.576766)
  6429. > x1_fact <- c(rep("x1", times=20))
  6430. > x2_fact <- c(rep("x2", times=20))
  6431. > a <- c(x1, x2)
  6432. > b <- factor(c(x1_fact, x2_fact))
  6433. > ag.test(a ~ b, tibble(a, b))
  6434. Alexander-Govern Test (alpha = 0.05)
  6435. -------------------------------------------------------------
  6436. data : a and b
  6437. statistic : 0.7135182
  6438. parameter : 1
  6439. p.value : 0.3982783
  6440. Result : Difference is not statistically significant.
  6441. -------------------------------------------------------------
  6442. '''
  6443. xp_assert_close(soln.statistic, xp.asarray(0.7135182), rtol=2e-6)
  6444. xp_assert_close(soln.pvalue, xp.asarray(0.3982783), rtol=2e-6)
  6445. def test_nan_policy_propagate(self, xp):
  6446. args = xp.asarray([1., 2., 3., 4.]), xp.asarray([1, xp.nan])
  6447. # default nan_policy is 'propagate'
  6448. res = stats.alexandergovern(*args)
  6449. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  6450. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  6451. @skip_xp_backends('jax.numpy', reason="Requires `_axis_nan_policy` decorator")
  6452. @skip_xp_backends('dask.array', reason="Requires `_axis_nan_policy` decorator")
  6453. def test_nan_policy_raise(self, xp):
  6454. args = xp.asarray([1., 2., 3., 4.]), xp.asarray([1., xp.nan])
  6455. with assert_raises(ValueError, match="The input contains nan values"):
  6456. stats.alexandergovern(*args, nan_policy='raise')
  6457. @skip_xp_backends('jax.numpy', reason="Requires `_axis_nan_policy` decorator")
  6458. @skip_xp_backends('dask.array', reason="Requires `_axis_nan_policy` decorator")
  6459. def test_nan_policy_omit(self, xp):
  6460. args_nan = xp.asarray([1, 2, 3, xp.nan, 4]), xp.asarray([1, xp.nan, 19, 25])
  6461. args_no_nan = xp.asarray([1, 2, 3, 4]), xp.asarray([1, 19, 25])
  6462. res_nan = stats.alexandergovern(*args_nan, nan_policy='omit')
  6463. res_no_nan = stats.alexandergovern(*args_no_nan)
  6464. xp_assert_equal(res_nan.pvalue, res_no_nan.pvalue)
  6465. xp_assert_equal(res_nan.statistic, res_no_nan.statistic)
  6466. @skip_xp_backends('jax.numpy', reason="Requires `_axis_nan_policy` decorator")
  6467. @skip_xp_backends('dask.array', reason="Requires `_axis_nan_policy` decorator")
  6468. def test_constant_input(self, xp):
  6469. # Zero variance input, consistent with `stats.pearsonr`
  6470. x1 = xp.asarray([0.667, 0.667, 0.667])
  6471. x2 = xp.asarray([0.123, 0.456, 0.789])
  6472. with pytest.warns(RuntimeWarning, match="Precision loss occurred..."):
  6473. res = stats.alexandergovern(x1, x2)
  6474. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  6475. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  6476. @skip_xp_backends('jax.numpy', reason="Requires `_axis_nan_policy` decorator")
  6477. @skip_xp_backends('dask.array', reason="Requires `_axis_nan_policy` decorator")
  6478. @pytest.mark.parametrize('axis', [0, 1, None])
  6479. def test_2d_input(self, xp, axis):
  6480. # much more extensive testing for NumPy in `test_axis_nan_policy.py`
  6481. rng = np.random.default_rng(23498389754392854235)
  6482. shape = (11, 12)
  6483. samples = [rng.random(shape) for i in range(3)]
  6484. xp_samples = [xp.asarray(sample) for sample in samples]
  6485. ref = stats.alexandergovern(*samples, axis=axis)
  6486. res = stats.alexandergovern(*xp_samples, axis=axis)
  6487. xp_assert_close(res.statistic, xp.asarray(ref.statistic))
  6488. xp_assert_close(res.pvalue, xp.asarray(ref.pvalue))
  6489. @make_xp_test_case(stats.f_oneway)
  6490. class TestFOneWay:
  6491. def test_trivial(self, xp):
  6492. # A trivial test of stats.f_oneway, with F=0.
  6493. F, p = stats.f_oneway(xp.asarray([0, 2]), xp.asarray([0, 2]))
  6494. xp_assert_equal(F, xp.asarray(0.0))
  6495. xp_assert_equal(p, xp.asarray(1.0))
  6496. @pytest.mark.parametrize("dtype", [None, "float32", "float64"])
  6497. def test_basic(self, dtype, xp):
  6498. # Despite being a floating point calculation, this data should
  6499. # result in F being exactly 2.0.
  6500. if is_numpy(xp) and xp.__version__ < "2.0" and dtype=='float32':
  6501. pytest.skip("NumPy doesn't preserve dtype pre-NEP 50.")
  6502. dtype = None if dtype is None else getattr(xp, dtype)
  6503. F, p = stats.f_oneway(xp.asarray([0, 2], dtype=dtype),
  6504. xp.asarray([2, 4], dtype=dtype))
  6505. xp_assert_equal(F, xp.asarray(2.0, dtype=dtype))
  6506. xp_assert_close(p, xp.asarray(1 - 0.5**0.5, dtype=dtype))
  6507. @pytest.mark.parametrize("dtype", [None, "float32", "float64"])
  6508. def test_unequal_var(self, dtype, xp):
  6509. # toy samples with unequal variances and different observations
  6510. if is_numpy(xp) and xp.__version__ < "2.0" and dtype=='float32':
  6511. pytest.skip("NumPy doesn't preserve dtype pre-NEP 50.")
  6512. dtype = None if dtype is None else getattr(xp, dtype)
  6513. samples = [xp.asarray([-50.42, 40.31, -18.09, 35.58, -6.8, 0.22], dtype=dtype),
  6514. xp.asarray([23.44, 4.5, 15.1, 9.66], dtype=dtype),
  6515. xp.asarray([11.94, 11.1 , 9.87, 9.09, 3.33], dtype=dtype)]
  6516. F, p = stats.f_oneway(*samples, equal_var=False)
  6517. # R language as benchmark
  6518. # group1 <- c(-50.42, 40.31, -18.09, 35.58, -6.8, 0.22)
  6519. # group2 <- c(23.44, 4.5, 15.1, 9.66)
  6520. # group3 <- c(11.94, 11.1 , 9.87, 9.09, 3.33)
  6521. #
  6522. # data <- data.frame(
  6523. # value = c(group1, group2, group3),
  6524. # group = factor(c(rep("G1", length(group1)),
  6525. # rep("G2", length(group2)),
  6526. # rep("G3", length(group3))))
  6527. # )
  6528. # welch_anova <- oneway.test(value ~ group, data = data, var.equal = FALSE)
  6529. # welch_anova$statistic
  6530. ## F: 0.609740409019517
  6531. # welch_anova$p.value
  6532. ## 0.574838941286302
  6533. xp_assert_close(F, xp.asarray(0.609740409019517, dtype=dtype))
  6534. xp_assert_close(p, xp.asarray(0.574838941286302, dtype=dtype))
  6535. def test_equal_var_input_validation(self, xp):
  6536. samples = [xp.asarray([-50.42, 40.31, -18.09, 35.58, -6.8, 0.22]),
  6537. xp.asarray([23.44, 4.5, 15.1, 9.66]),
  6538. xp.asarray([11.94, 11.1 , 9.87, 9.09, 3.33])]
  6539. message = "Expected a boolean value for 'equal_var'"
  6540. with pytest.raises(TypeError, match=message):
  6541. stats.f_oneway(*samples, equal_var="False")
  6542. def test_known_exact(self, xp):
  6543. # Another trivial dataset for which the exact F and p can be
  6544. # calculated on most platforms
  6545. F, p = stats.f_oneway(xp.asarray([2]), xp.asarray([2]),
  6546. xp.asarray([2, 3, 4]))
  6547. xp_assert_close(F, xp.asarray(3/5)) # assert_equal fails on some CI platforms
  6548. xp_assert_close(p, xp.asarray(5/8))
  6549. def test_large_integer_array(self, xp):
  6550. a = xp.asarray([655, 788], dtype=xp.uint16)
  6551. b = xp.asarray([789, 772], dtype=xp.uint16)
  6552. F, p = stats.f_oneway(a, b)
  6553. # The expected value was verified by computing it with mpmath with
  6554. # 40 digits of precision.
  6555. xp_assert_close(F, xp.asarray(0.77450216931805540))
  6556. def test_result_attributes(self, xp):
  6557. a = xp.asarray([655, 788], dtype=xp.uint16)
  6558. b = xp.asarray([789, 772], dtype=xp.uint16)
  6559. res = stats.f_oneway(a, b)
  6560. attributes = ('statistic', 'pvalue')
  6561. check_named_results(res, attributes, xp=xp)
  6562. @pytest.mark.parametrize('test_case',
  6563. ['SiRstv', 'SmLs01', 'SmLs02', 'SmLs03', 'AtmWtAg', 'SmLs04', 'SmLs05',
  6564. 'SmLs06', 'SmLs07', 'SmLs08', 'SmLs09'])
  6565. def test_nist(self, test_case, xp):
  6566. # These are the nist ANOVA files. They can be found at:
  6567. # https://www.itl.nist.gov/div898/strd/anova/anova.html
  6568. filename = test_case + ".dat"
  6569. rtol = 1e-7
  6570. fname = os.path.abspath(os.path.join(os.path.dirname(__file__),
  6571. 'data/nist_anova', filename))
  6572. with open(fname) as f:
  6573. content = f.read().split('\n')
  6574. certified = [line.split() for line in content[40:48]
  6575. if line.strip()]
  6576. dataf = np.loadtxt(fname, skiprows=60)
  6577. y, x = dataf.T
  6578. y = y.astype(int)
  6579. caty = np.unique(y)
  6580. f = float(certified[0][-1])
  6581. xlist = [xp.asarray(x[y == i]) for i in caty]
  6582. res = stats.f_oneway(*xlist)
  6583. # With the hard test cases we relax the tolerance a bit.
  6584. hard_tc = ('SmLs07', 'SmLs08', 'SmLs09')
  6585. if test_case in hard_tc:
  6586. rtol = 1e-4
  6587. xp_assert_close(res[0], xp.asarray(f, dtype=xp.float64), rtol=rtol)
  6588. @pytest.mark.filterwarnings('ignore') # Dask emits NumPy warnings
  6589. @pytest.mark.parametrize("a, b, expected", [
  6590. (np.array([42, 42, 42]), np.array([7, 7, 7]), (np.inf, 0.)),
  6591. (np.array([42, 42, 42]), np.array([42, 42, 42]), (np.nan, np.nan))
  6592. ])
  6593. def test_constant_input(self, a, b, expected, xp):
  6594. # For more details, look on https://github.com/scipy/scipy/issues/11669
  6595. f, p = stats.f_oneway(xp.asarray(a), xp.asarray(b))
  6596. xp_assert_equal(f, xp.asarray(expected[0]))
  6597. xp_assert_equal(p, xp.asarray(expected[1]))
  6598. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  6599. @pytest.mark.skip_xp_backends('dask.array', reason='lazy -> no _axis_nan_policy')
  6600. @pytest.mark.parametrize('axis', [-2, -1, 0, 1])
  6601. def test_2d_inputs(self, axis, xp):
  6602. a = np.array([[1, 4, 3, 3],
  6603. [2, 5, 3, 3],
  6604. [3, 6, 3, 3],
  6605. [2, 3, 3, 3],
  6606. [1, 4, 3, 3]], dtype=np.float64)
  6607. b = np.array([[3, 1, 5, 3],
  6608. [4, 6, 5, 3],
  6609. [4, 3, 5, 3],
  6610. [1, 5, 5, 3],
  6611. [5, 5, 5, 3],
  6612. [2, 3, 5, 3],
  6613. [8, 2, 5, 3],
  6614. [2, 2, 5, 3]], dtype=np.float64)
  6615. c = np.array([[4, 3, 4, 3],
  6616. [4, 2, 4, 3],
  6617. [5, 4, 4, 3],
  6618. [5, 4, 4, 3]], dtype=np.float64)
  6619. if axis in [-1, 1]:
  6620. a = a.T
  6621. b = b.T
  6622. c = c.T
  6623. take_axis = 0
  6624. else:
  6625. take_axis = 1
  6626. f, p = stats.f_oneway(xp.asarray(a), xp.asarray(b), xp.asarray(c),
  6627. axis=axis)
  6628. # Verify that the result computed with the 2d arrays matches
  6629. # the result of calling f_oneway individually on each slice.
  6630. for j in [0, 1]:
  6631. fj, pj = stats.f_oneway(np.take(a, j, take_axis),
  6632. np.take(b, j, take_axis),
  6633. np.take(c, j, take_axis))
  6634. xp_assert_close(f[j], xp.asarray(fj))
  6635. xp_assert_close(p[j], xp.asarray(pj))
  6636. for j in [2, 3]:
  6637. fj, pj = stats.f_oneway(np.take(a, j, take_axis),
  6638. np.take(b, j, take_axis),
  6639. np.take(c, j, take_axis))
  6640. xp_assert_close(f[j], xp.asarray(fj))
  6641. xp_assert_close(p[j], xp.asarray(pj))
  6642. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  6643. @pytest.mark.skip_xp_backends('dask.array', reason='lazy -> no _axis_nan_policy')
  6644. def test_3d_inputs(self, xp):
  6645. # Some 3-d arrays. (There is nothing special about the values.)
  6646. a = xp.reshape(1/xp.arange(1.0, 4*5*7 + 1., dtype=xp.float64), (4, 5, 7))
  6647. b = xp.reshape(2/xp.arange(1.0, 4*8*7 + 1., dtype=xp.float64), (4, 8, 7))
  6648. c = xp.reshape(1/xp.arange(1.0, 4*4*7 + 1., dtype=xp.float64), (4, 4, 7))
  6649. c = xp.cos(c)
  6650. f, p = stats.f_oneway(a, b, c, axis=1)
  6651. assert f.shape == (4, 7)
  6652. assert p.shape == (4, 7)
  6653. for i in range(a.shape[0]):
  6654. for j in range(a.shape[2]):
  6655. fij, pij = stats.f_oneway(a[i, :, j], b[i, :, j], c[i, :, j])
  6656. xp_assert_close(fij, f[i, j])
  6657. xp_assert_close(pij, p[i, j])
  6658. def test_length0_1d_error(self, xp):
  6659. # Require at least one value in each group.
  6660. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  6661. result = stats.f_oneway(xp.asarray([1., 2., 3.]), xp.asarray([]),
  6662. xp.asarray([4., 5., 6., 7.]))
  6663. xp_assert_equal(result.statistic, xp.asarray(xp.nan))
  6664. xp_assert_equal(result.pvalue, xp.asarray(xp.nan))
  6665. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  6666. @pytest.mark.skip_xp_backends('dask.array', reason='lazy -> no _axis_nan_policy')
  6667. def test_length0_2d_error(self, xp):
  6668. with eager_warns(SmallSampleWarning, match=too_small_nd_not_omit, xp=xp):
  6669. ncols = 3
  6670. a = xp.ones((4, ncols))
  6671. b = xp.ones((0, ncols))
  6672. c = xp.ones((5, ncols))
  6673. f, p = stats.f_oneway(a, b, c)
  6674. nans = xp.full((ncols,), fill_value=xp.nan)
  6675. xp_assert_equal(f, nans)
  6676. xp_assert_equal(p, nans)
  6677. def test_all_length_one(self, xp):
  6678. samples = xp.asarray([10]), xp.asarray([11]), xp.asarray([12]), xp.asarray([13])
  6679. with eager_warns(SmallSampleWarning, xp=xp):
  6680. result = stats.f_oneway(*samples)
  6681. xp_assert_equal(result.statistic, xp.asarray(xp.nan))
  6682. xp_assert_equal(result.pvalue, xp.asarray(xp.nan))
  6683. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy->reduced input validation')
  6684. @pytest.mark.skip_xp_backends('dask.array', reason='lazy->reduced input validation')
  6685. @pytest.mark.parametrize('args', [(), ([1, 2, 3],)])
  6686. def test_too_few_inputs(self, args, xp):
  6687. args = [xp.asarray(arg) for arg in args]
  6688. message = "At least two samples are required..."
  6689. with pytest.raises(TypeError, match=message):
  6690. stats.f_oneway(*args)
  6691. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy->reduced input validation')
  6692. @pytest.mark.skip_xp_backends('dask.array', reason='lazy->reduced input validation')
  6693. def test_axis_error(self, xp):
  6694. a = xp.ones((3, 4))
  6695. b = xp.ones((5, 4))
  6696. with pytest.raises(AxisError):
  6697. stats.f_oneway(a, b, axis=2)
  6698. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy->reduced input validation')
  6699. @pytest.mark.skip_xp_backends('dask.array', reason='lazy->reduced input validation')
  6700. def test_bad_shapes(self, xp):
  6701. a = xp.ones((3, 4))
  6702. b = xp.ones((5, 4))
  6703. with pytest.raises(ValueError):
  6704. stats.f_oneway(a, b, axis=1)
  6705. @make_xp_test_case(stats.kruskal)
  6706. class TestKruskal:
  6707. def test_array_like(self):
  6708. h, p = stats.kruskal([1], [2])
  6709. assert_equal(h, 1.0)
  6710. assert_allclose(p, stats.chi2.sf(h, 1))
  6711. def test_simple(self, xp):
  6712. x = xp.asarray([1])
  6713. y = xp.asarray([2])
  6714. h, p = stats.kruskal(x, y)
  6715. xp_assert_close(h, xp.asarray(1.0))
  6716. dtype = xp_default_dtype(xp)
  6717. xp_assert_close(p, xp.asarray(stats.chi2.sf(1, 1), dtype=dtype))
  6718. @pytest.mark.parametrize("dtype", ['float32', 'float64', None])
  6719. def test_basic(self, xp, dtype):
  6720. if dtype == 'float32' and np.__version__ < "2":
  6721. pytest.skip("Scalar dtypes only respected after NEP 50.")
  6722. dtype = xp_default_dtype(xp) if dtype is None else getattr(xp, dtype)
  6723. x = xp.asarray([1, 3, 5, 7, 9], dtype=dtype)
  6724. y = xp.asarray([2, 4, 6, 8, 10], dtype=dtype)
  6725. h, p = stats.kruskal(x, y)
  6726. xp_assert_close(h, xp.asarray(3/11, dtype=dtype))
  6727. xp_assert_close(p, xp.asarray(stats.chi2.sf(3/11, 1), dtype=dtype))
  6728. def test_simple_tie(self, xp):
  6729. x = [1]
  6730. y = [1, 2]
  6731. h, p = stats.kruskal(xp.asarray(x), xp.asarray(y))
  6732. h_uncorr = 1.5**2 + 2*2.25**2 - 12
  6733. corr = 0.75
  6734. expected = xp.asarray(h_uncorr / corr) # 0.5
  6735. # Since the expression is simple and the exact answer is 0.5, it
  6736. # should be safe to use assert_equal().
  6737. xp_assert_equal(h, expected)
  6738. xp_assert_close(p, special.chdtrc(xp.asarray(1.), expected))
  6739. def test_another_tie(self, xp):
  6740. x = [1, 1, 1, 2]
  6741. y = [2, 2, 2, 2]
  6742. h, p = stats.kruskal(xp.asarray(x), xp.asarray(y))
  6743. h_uncorr = (12. / 8. / 9.) * 4 * (3**2 + 6**2) - 3 * 9
  6744. corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)
  6745. expected = xp.asarray(h_uncorr / corr)
  6746. xp_assert_close(h, expected)
  6747. xp_assert_close(p, special.chdtrc(xp.asarray(1.), expected))
  6748. def test_three_groups(self, xp):
  6749. # A test of stats.kruskal with three groups, with ties.
  6750. x = [1, 1, 1]
  6751. y = [2, 2, 2]
  6752. z = [2, 2]
  6753. h, p = stats.kruskal(xp.asarray(x), xp.asarray(y), xp.asarray(z))
  6754. h_uncorr = (12. / 8. / 9.) * (3*2**2 + 3*6**2 + 2*6**2) - 3 * 9 # 5.0
  6755. corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)
  6756. expected = xp.asarray(h_uncorr / corr) # 7.0
  6757. xp_assert_close(h, expected)
  6758. xp_assert_close(p, special.chdtrc(xp.asarray(2.), expected))
  6759. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  6760. def test_empty(self, xp):
  6761. # A test of stats.kruskal with three groups, with ties.
  6762. x = xp.asarray([1, 1, 1])
  6763. y = xp.asarray([2, 2, 2])
  6764. z = xp.asarray([], dtype=y.dtype)
  6765. with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
  6766. res = stats.kruskal(x, y, z)
  6767. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  6768. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  6769. @pytest.mark.skip_xp_backends('jax.numpy', reason='lazy -> no _axis_nan_policy')
  6770. def test_nan_policy(self, xp):
  6771. x = xp.arange(10.)
  6772. x[9] = xp.nan
  6773. res = stats.kruskal(x, x)
  6774. xp_assert_equal(res.statistic, xp.asarray(xp.nan))
  6775. xp_assert_equal(res.pvalue, xp.asarray(xp.nan))
  6776. res = stats.kruskal(x, x, nan_policy='omit')
  6777. xp_assert_equal(res.statistic, xp.asarray(0.0))
  6778. xp_assert_equal(res.pvalue, xp.asarray(1.0))
  6779. with pytest.raises(ValueError, match='The input contains nan values'):
  6780. stats.kruskal(x, x, nan_policy='raise')
  6781. with pytest.raises(ValueError, match='nan_policy must be one of...'):
  6782. stats.kruskal(x, x, nan_policy='foobar')
  6783. def test_large_samples(self, xp):
  6784. # Test to see if large samples are handled correctly.
  6785. n = 50000
  6786. rng = np.random.default_rng(1808365978)
  6787. x = xp.asarray(rng.standard_normal(n))
  6788. y = xp.asarray(rng.standard_normal(n) + 50)
  6789. h, p = stats.kruskal(x, y)
  6790. xp_assert_close(p, xp.asarray(0., dtype=x.dtype))
  6791. def test_no_args_gh20661(self, xp):
  6792. message = r"Need at least two groups in stats.kruskal\(\)"
  6793. with pytest.raises(ValueError, match=message):
  6794. stats.kruskal()
  6795. @make_xp_test_case(stats.combine_pvalues)
  6796. class TestCombinePvalues:
  6797. # Reference values computed using the following R code:
  6798. # options(digits=16)
  6799. # library(metap)
  6800. # x = c(0.01, 0.2, 0.3)
  6801. # sumlog(x) # fisher
  6802. # sumz(x) # stouffer
  6803. # sumlog(1-x) # pearson (negative statistic and complement of p-value)
  6804. # minimump(x) # tippett
  6805. @pytest.mark.parametrize(
  6806. "method, expected_statistic, expected_pvalue",
  6807. [("fisher", 14.83716180549625 , 0.02156175132483465),
  6808. ("stouffer", 2.131790594240385, 0.01651203260896294),
  6809. ("pearson", -1.179737662212887, 1-0.9778736999143087),
  6810. ("tippett", 0.01, 0.02970100000000002),
  6811. # mudholkar_george: library(transite); p_combine(x, method="MG")
  6812. ("mudholkar_george", 6.828712071641684, 0.01654551838539527)])
  6813. def test_reference_values(self, xp, method, expected_statistic, expected_pvalue):
  6814. x = [.01, .2, .3]
  6815. res = stats.combine_pvalues(xp.asarray(x), method=method)
  6816. xp_assert_close(res.statistic, xp.asarray(expected_statistic))
  6817. xp_assert_close(res.pvalue, xp.asarray(expected_pvalue))
  6818. @pytest.mark.parametrize(
  6819. # Reference values computed using R `metap` `sumz`:
  6820. # options(digits=16)
  6821. # library(metap)
  6822. # x = c(0.01, 0.2, 0.3)
  6823. # sumz(x, weights=c(1., 1., 1.))
  6824. # sumz(x, weights=c(1., 4., 9.))
  6825. "weights, expected_statistic, expected_pvalue",
  6826. [([1., 1., 1.], 2.131790594240385, 0.01651203260896294),
  6827. ([1., 4., 9.], 1.051815015753598, 0.1464422142261314)])
  6828. def test_weighted_stouffer(self, xp, weights, expected_statistic, expected_pvalue):
  6829. x = xp.asarray([.01, .2, .3])
  6830. res = stats.combine_pvalues(x, method='stouffer', weights=xp.asarray(weights))
  6831. xp_assert_close(res.statistic, xp.asarray(expected_statistic))
  6832. xp_assert_close(res.pvalue, xp.asarray(expected_pvalue))
  6833. methods = ["fisher", "pearson", "tippett", "stouffer", "mudholkar_george"]
  6834. @pytest.mark.parametrize("variant", ["single", "all", "random"])
  6835. @pytest.mark.parametrize("method", methods)
  6836. def test_monotonicity(self, variant, method, xp):
  6837. # Test that result increases monotonically with respect to input.
  6838. m, n = 10, 7
  6839. rng = np.random.default_rng(278448169958891062669391462690811630763)
  6840. # `pvaluess` is an m × n array of p values. Each row corresponds to
  6841. # a set of p values to be combined with p values increasing
  6842. # monotonically down one column (single), simultaneously down each
  6843. # column (all), or independently down each column (random).
  6844. if variant == "single":
  6845. pvaluess = xp.broadcast_to(xp.asarray(rng.random(n)), (m, n))
  6846. pvaluess = xp.concat([xp.reshape(xp.linspace(0.1, 0.9, m), (-1, 1)),
  6847. pvaluess[:, 1:]], axis=1)
  6848. elif variant == "all":
  6849. pvaluess = xp.broadcast_to(xp.linspace(0.1, 0.9, m), (n, m)).T
  6850. elif variant == "random":
  6851. pvaluess = xp.sort(xp.asarray(rng.uniform(0, 1, size=(m, n))), axis=0)
  6852. combined_pvalues = xp.asarray([
  6853. float(stats.combine_pvalues(pvaluess[i, :], method=method)[1])
  6854. for i in range(pvaluess.shape[0])
  6855. ])
  6856. assert xp.all(combined_pvalues[1:] - combined_pvalues[:-1] >= 0)
  6857. @pytest.mark.parametrize("method", methods)
  6858. def test_result(self, method, xp):
  6859. res = stats.combine_pvalues(xp.asarray([.01, .2, .3]), method=method)
  6860. xp_assert_equal(res.statistic, res[0])
  6861. xp_assert_equal(res.pvalue, res[1])
  6862. @pytest.mark.parametrize("method", methods)
  6863. @pytest.mark.parametrize("axis", [0, 1, None])
  6864. def test_axis(self, method, axis, xp):
  6865. rng = np.random.default_rng(234892349810482)
  6866. x = xp.asarray(rng.random(size=(2, 10)))
  6867. x = x.T if (axis == 0) else x
  6868. res = stats.combine_pvalues(x, axis=axis, method=method)
  6869. if axis is None:
  6870. x = xp.reshape(x, (-1,))
  6871. ref = stats.combine_pvalues(x, method=method)
  6872. xp_assert_close(res.statistic, ref.statistic)
  6873. xp_assert_close(res.pvalue, ref.pvalue)
  6874. return
  6875. x = x.T if (axis == 0) else x
  6876. x0, x1 = x[0, :], x[1, :]
  6877. ref0 = stats.combine_pvalues(x0, method=method)
  6878. ref1 = stats.combine_pvalues(x1, method=method)
  6879. xp_assert_close(res.statistic[0], ref0.statistic)
  6880. xp_assert_close(res.statistic[1], ref1.statistic)
  6881. xp_assert_close(res.pvalue[0], ref0.pvalue)
  6882. xp_assert_close(res.pvalue[1], ref1.pvalue)
  6883. class TestCdfDistanceValidation:
  6884. """
  6885. Test that _cdf_distance() (via wasserstein_distance()) raises ValueErrors
  6886. for bad inputs.
  6887. """
  6888. def test_distinct_value_and_weight_lengths(self):
  6889. # When the number of weights does not match the number of values,
  6890. # a ValueError should be raised.
  6891. assert_raises(ValueError, stats.wasserstein_distance,
  6892. [1], [2], [4], [3, 1])
  6893. assert_raises(ValueError, stats.wasserstein_distance, [1], [2], [1, 0])
  6894. def test_zero_weight(self):
  6895. # When a distribution is given zero weight, a ValueError should be
  6896. # raised.
  6897. assert_raises(ValueError, stats.wasserstein_distance,
  6898. [0, 1], [2], [0, 0])
  6899. assert_raises(ValueError, stats.wasserstein_distance,
  6900. [0, 1], [2], [3, 1], [0])
  6901. def test_negative_weights(self):
  6902. # A ValueError should be raised if there are any negative weights.
  6903. assert_raises(ValueError, stats.wasserstein_distance,
  6904. [0, 1], [2, 2], [1, 1], [3, -1])
  6905. def test_empty_distribution(self):
  6906. # A ValueError should be raised when trying to measure the distance
  6907. # between something and nothing.
  6908. assert_raises(ValueError, stats.wasserstein_distance, [], [2, 2])
  6909. assert_raises(ValueError, stats.wasserstein_distance, [1], [])
  6910. def test_inf_weight(self):
  6911. # An inf weight is not valid.
  6912. assert_raises(ValueError, stats.wasserstein_distance,
  6913. [1, 2, 1], [1, 1], [1, np.inf, 1], [1, 1])
  6914. class TestWassersteinDistanceND:
  6915. """ Tests for wasserstein_distance_nd() output values.
  6916. """
  6917. def test_published_values(self):
  6918. # Compare against published values and manually computed results.
  6919. # The values and computed result are posted at James D. McCaffrey's blog,
  6920. # https://jamesmccaffrey.wordpress.com/2018/03/05/earth-mover-distance
  6921. # -wasserstein-metric-example-calculation/
  6922. u = [(1,1), (1,1), (1,1), (1,1), (1,1), (1,1), (1,1), (1,1), (1,1), (1,1),
  6923. (4,2), (6,1), (6,1)]
  6924. v = [(2,1), (2,1), (3,2), (3,2), (3,2), (5,1), (5,1), (5,1), (5,1), (5,1),
  6925. (5,1), (5,1), (7,1)]
  6926. res = stats.wasserstein_distance_nd(u, v)
  6927. # In original post, the author kept two decimal places for ease of calculation.
  6928. # This test uses the more precise value of distance to get the precise results.
  6929. # For comparison, please see the table and figure in the original blog post.
  6930. flow = np.array([2., 3., 5., 1., 1., 1.])
  6931. dist = np.array([1.00, 5**0.5, 4.00, 2**0.5, 1.00, 1.00])
  6932. ref = np.sum(flow * dist)/np.sum(flow)
  6933. assert_allclose(res, ref)
  6934. @pytest.mark.parametrize('n_value', (4, 15, 35))
  6935. @pytest.mark.parametrize('ndim', (3, 4, 7))
  6936. @pytest.mark.parametrize('max_repeats', (5, 10))
  6937. def test_same_distribution_nD(self, ndim, n_value, max_repeats):
  6938. # Any distribution moved to itself should have a Wasserstein distance
  6939. # of zero.
  6940. rng = np.random.default_rng(363836384995579937222333)
  6941. repeats = rng.integers(1, max_repeats, size=n_value, dtype=int)
  6942. u_values = rng.random(size=(n_value, ndim))
  6943. v_values = np.repeat(u_values, repeats, axis=0)
  6944. v_weights = rng.random(np.sum(repeats))
  6945. range_repeat = np.repeat(np.arange(len(repeats)), repeats)
  6946. u_weights = np.bincount(range_repeat, weights=v_weights)
  6947. index = rng.permutation(len(v_weights))
  6948. v_values, v_weights = v_values[index], v_weights[index]
  6949. res = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights)
  6950. assert_allclose(res, 0, atol=1e-15)
  6951. @pytest.mark.parametrize('nu', (8, 9, 38))
  6952. @pytest.mark.parametrize('nv', (8, 12, 17))
  6953. @pytest.mark.parametrize('ndim', (3, 5, 23))
  6954. def test_collapse_nD(self, nu, nv, ndim):
  6955. # test collapse for n dimensional values
  6956. # Collapsing a n-D distribution to a point distribution at zero
  6957. # is equivalent to taking the average of the norm of data.
  6958. rng = np.random.default_rng(38573488467338826109)
  6959. u_values = rng.random(size=(nu, ndim))
  6960. v_values = np.zeros((nv, ndim))
  6961. u_weights = rng.random(size=nu)
  6962. v_weights = rng.random(size=nv)
  6963. ref = np.average(np.linalg.norm(u_values, axis=1), weights=u_weights)
  6964. res = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights)
  6965. assert_allclose(res, ref)
  6966. @pytest.mark.parametrize('nu', (8, 16, 32))
  6967. @pytest.mark.parametrize('nv', (8, 16, 32))
  6968. @pytest.mark.parametrize('ndim', (1, 2, 6))
  6969. def test_zero_weight_nD(self, nu, nv, ndim):
  6970. # Values with zero weight have no impact on the Wasserstein distance.
  6971. rng = np.random.default_rng(38573488467338826109)
  6972. u_values = rng.random(size=(nu, ndim))
  6973. v_values = rng.random(size=(nv, ndim))
  6974. u_weights = rng.random(size=nu)
  6975. v_weights = rng.random(size=nv)
  6976. ref = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights)
  6977. add_row, nrows = rng.integers(0, nu, size=2)
  6978. add_value = rng.random(size=(nrows, ndim))
  6979. u_values = np.insert(u_values, add_row, add_value, axis=0)
  6980. u_weights = np.insert(u_weights, add_row, np.zeros(nrows), axis=0)
  6981. res = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights)
  6982. assert_allclose(res, ref)
  6983. def test_inf_values(self):
  6984. # Inf values can lead to an inf distance or trigger a RuntimeWarning
  6985. # (and return NaN) if the distance is undefined.
  6986. uv, vv, uw = [[1, 1], [2, 1]], [[np.inf, -np.inf]], [1, 1]
  6987. distance = stats.wasserstein_distance_nd(uv, vv, uw)
  6988. assert_equal(distance, np.inf)
  6989. with np.errstate(invalid='ignore'):
  6990. uv, vv = [[np.inf, np.inf]], [[np.inf, -np.inf]]
  6991. distance = stats.wasserstein_distance_nd(uv, vv)
  6992. assert_equal(distance, np.nan)
  6993. @pytest.mark.parametrize('nu', (10, 15, 20))
  6994. @pytest.mark.parametrize('nv', (10, 15, 20))
  6995. @pytest.mark.parametrize('ndim', (1, 3, 5))
  6996. def test_multi_dim_nD(self, nu, nv, ndim):
  6997. # Adding dimension on distributions do not affect the result
  6998. rng = np.random.default_rng(2736495738494849509)
  6999. u_values = rng.random(size=(nu, ndim))
  7000. v_values = rng.random(size=(nv, ndim))
  7001. u_weights = rng.random(size=nu)
  7002. v_weights = rng.random(size=nv)
  7003. ref = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights)
  7004. add_dim = rng.integers(0, ndim)
  7005. add_value = rng.random()
  7006. u_values = np.insert(u_values, add_dim, add_value, axis=1)
  7007. v_values = np.insert(v_values, add_dim, add_value, axis=1)
  7008. res = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights)
  7009. assert_allclose(res, ref)
  7010. @pytest.mark.parametrize('nu', (7, 13, 19))
  7011. @pytest.mark.parametrize('nv', (7, 13, 19))
  7012. @pytest.mark.parametrize('ndim', (2, 4, 7))
  7013. def test_orthogonal_nD(self, nu, nv, ndim):
  7014. # orthogonal transformations do not affect the result of the
  7015. # wasserstein_distance
  7016. rng = np.random.default_rng(34746837464536)
  7017. u_values = rng.random(size=(nu, ndim))
  7018. v_values = rng.random(size=(nv, ndim))
  7019. u_weights = rng.random(size=nu)
  7020. v_weights = rng.random(size=nv)
  7021. ref = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights)
  7022. dist = stats.ortho_group(ndim)
  7023. transform = dist.rvs(random_state=rng)
  7024. shift = rng.random(size=ndim)
  7025. res = stats.wasserstein_distance_nd(u_values @ transform + shift,
  7026. v_values @ transform + shift,
  7027. u_weights, v_weights)
  7028. assert_allclose(res, ref)
  7029. def test_error_code(self):
  7030. rng = np.random.default_rng(52473644737485644836320101)
  7031. with pytest.raises(ValueError, match='Invalid input values. The inputs'):
  7032. u_values = rng.random(size=(4, 10, 15))
  7033. v_values = rng.random(size=(6, 2, 7))
  7034. _ = stats.wasserstein_distance_nd(u_values, v_values)
  7035. with pytest.raises(ValueError, match='Invalid input values. Dimensions'):
  7036. u_values = rng.random(size=(15,))
  7037. v_values = rng.random(size=(3, 15))
  7038. _ = stats.wasserstein_distance_nd(u_values, v_values)
  7039. with pytest.raises(ValueError,
  7040. match='Invalid input values. If two-dimensional'):
  7041. u_values = rng.random(size=(2, 10))
  7042. v_values = rng.random(size=(2, 2))
  7043. _ = stats.wasserstein_distance_nd(u_values, v_values)
  7044. @pytest.mark.parametrize('u_size', [1, 10, 50])
  7045. @pytest.mark.parametrize('v_size', [1, 10, 50])
  7046. def test_optimization_vs_analytical(self, u_size, v_size):
  7047. rng = np.random.default_rng(45634745675)
  7048. # Test when u_weights = None, v_weights = None
  7049. u_values = rng.random(size=(u_size, 1))
  7050. v_values = rng.random(size=(v_size, 1))
  7051. u_values_flat = u_values.ravel()
  7052. v_values_flat = v_values.ravel()
  7053. # These three calculations are done using different backends
  7054. # but they must be equal
  7055. d1 = stats.wasserstein_distance(u_values_flat, v_values_flat)
  7056. d2 = stats.wasserstein_distance_nd(u_values, v_values)
  7057. d3 = stats.wasserstein_distance_nd(u_values_flat, v_values_flat)
  7058. assert_allclose(d2, d1)
  7059. assert_allclose(d3, d1)
  7060. # Test with u_weights and v_weights specified.
  7061. u_weights = rng.random(size=u_size)
  7062. v_weights = rng.random(size=v_size)
  7063. d1 = stats.wasserstein_distance(u_values_flat, v_values_flat,
  7064. u_weights, v_weights)
  7065. d2 = stats.wasserstein_distance_nd(u_values, v_values,
  7066. u_weights, v_weights)
  7067. d3 = stats.wasserstein_distance_nd(u_values_flat, v_values_flat,
  7068. u_weights, v_weights)
  7069. assert_allclose(d2, d1)
  7070. assert_allclose(d3, d1)
  7071. class TestWassersteinDistance:
  7072. """ Tests for wasserstein_distance() output values.
  7073. """
  7074. def test_simple(self):
  7075. # For basic distributions, the value of the Wasserstein distance is
  7076. # straightforward.
  7077. assert_allclose(
  7078. stats.wasserstein_distance([0, 1], [0], [1, 1], [1]),
  7079. .5)
  7080. assert_allclose(stats.wasserstein_distance(
  7081. [0, 1], [0], [3, 1], [1]),
  7082. .25)
  7083. assert_allclose(stats.wasserstein_distance(
  7084. [0, 2], [0], [1, 1], [1]),
  7085. 1)
  7086. assert_allclose(stats.wasserstein_distance(
  7087. [0, 1, 2], [1, 2, 3]),
  7088. 1)
  7089. def test_same_distribution(self):
  7090. # Any distribution moved to itself should have a Wasserstein distance
  7091. # of zero.
  7092. assert_equal(stats.wasserstein_distance([1, 2, 3], [2, 1, 3]), 0)
  7093. assert_equal(
  7094. stats.wasserstein_distance([1, 1, 1, 4], [4, 1],
  7095. [1, 1, 1, 1], [1, 3]),
  7096. 0)
  7097. def test_shift(self):
  7098. # If the whole distribution is shifted by x, then the Wasserstein
  7099. # distance should be the norm of x.
  7100. assert_allclose(stats.wasserstein_distance([0], [1]), 1)
  7101. assert_allclose(stats.wasserstein_distance([-5], [5]), 10)
  7102. assert_allclose(
  7103. stats.wasserstein_distance([1, 2, 3, 4, 5], [11, 12, 13, 14, 15]),
  7104. 10)
  7105. assert_allclose(
  7106. stats.wasserstein_distance([4.5, 6.7, 2.1], [4.6, 7, 9.2],
  7107. [3, 1, 1], [1, 3, 1]),
  7108. 2.5)
  7109. def test_combine_weights(self):
  7110. # Assigning a weight w to a value is equivalent to including that value
  7111. # w times in the value array with weight of 1.
  7112. assert_allclose(
  7113. stats.wasserstein_distance(
  7114. [0, 0, 1, 1, 1, 1, 5], [0, 3, 3, 3, 3, 4, 4],
  7115. [1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1]),
  7116. stats.wasserstein_distance([5, 0, 1], [0, 4, 3],
  7117. [1, 2, 4], [1, 2, 4]))
  7118. def test_collapse(self):
  7119. # Collapsing a distribution to a point distribution at zero is
  7120. # equivalent to taking the average of the absolute values of the
  7121. # values.
  7122. u = np.arange(-10, 30, 0.3)
  7123. v = np.zeros_like(u)
  7124. assert_allclose(
  7125. stats.wasserstein_distance(u, v),
  7126. np.mean(np.abs(u)))
  7127. u_weights = np.arange(len(u))
  7128. v_weights = u_weights[::-1]
  7129. assert_allclose(
  7130. stats.wasserstein_distance(u, v, u_weights, v_weights),
  7131. np.average(np.abs(u), weights=u_weights))
  7132. def test_zero_weight(self):
  7133. # Values with zero weight have no impact on the Wasserstein distance.
  7134. assert_allclose(
  7135. stats.wasserstein_distance([1, 2, 100000], [1, 1],
  7136. [1, 1, 0], [1, 1]),
  7137. stats.wasserstein_distance([1, 2], [1, 1], [1, 1], [1, 1]))
  7138. def test_inf_values(self):
  7139. # Inf values can lead to an inf distance or trigger a RuntimeWarning
  7140. # (and return NaN) if the distance is undefined.
  7141. assert_equal(
  7142. stats.wasserstein_distance([1, 2, np.inf], [1, 1]),
  7143. np.inf)
  7144. assert_equal(
  7145. stats.wasserstein_distance([1, 2, np.inf], [-np.inf, 1]),
  7146. np.inf)
  7147. assert_equal(
  7148. stats.wasserstein_distance([1, -np.inf, np.inf], [1, 1]),
  7149. np.inf)
  7150. with pytest.warns(RuntimeWarning, match="invalid value"):
  7151. assert_equal(
  7152. stats.wasserstein_distance([1, 2, np.inf], [np.inf, 1]),
  7153. np.nan)
  7154. class TestEnergyDistance:
  7155. """ Tests for energy_distance() output values.
  7156. """
  7157. def test_simple(self):
  7158. # For basic distributions, the value of the energy distance is
  7159. # straightforward.
  7160. assert_almost_equal(
  7161. stats.energy_distance([0, 1], [0], [1, 1], [1]),
  7162. np.sqrt(2) * .5)
  7163. assert_almost_equal(stats.energy_distance(
  7164. [0, 1], [0], [3, 1], [1]),
  7165. np.sqrt(2) * .25)
  7166. assert_almost_equal(stats.energy_distance(
  7167. [0, 2], [0], [1, 1], [1]),
  7168. 2 * .5)
  7169. assert_almost_equal(
  7170. stats.energy_distance([0, 1, 2], [1, 2, 3]),
  7171. np.sqrt(2) * (3*(1./3**2))**.5)
  7172. def test_same_distribution(self):
  7173. # Any distribution moved to itself should have a energy distance of
  7174. # zero.
  7175. assert_equal(stats.energy_distance([1, 2, 3], [2, 1, 3]), 0)
  7176. assert_equal(
  7177. stats.energy_distance([1, 1, 1, 4], [4, 1], [1, 1, 1, 1], [1, 3]),
  7178. 0)
  7179. def test_shift(self):
  7180. # If a single-point distribution is shifted by x, then the energy
  7181. # distance should be sqrt(2) * sqrt(x).
  7182. assert_almost_equal(stats.energy_distance([0], [1]), np.sqrt(2))
  7183. assert_almost_equal(
  7184. stats.energy_distance([-5], [5]),
  7185. np.sqrt(2) * 10**.5)
  7186. def test_combine_weights(self):
  7187. # Assigning a weight w to a value is equivalent to including that value
  7188. # w times in the value array with weight of 1.
  7189. assert_almost_equal(
  7190. stats.energy_distance([0, 0, 1, 1, 1, 1, 5], [0, 3, 3, 3, 3, 4, 4],
  7191. [1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1]),
  7192. stats.energy_distance([5, 0, 1], [0, 4, 3], [1, 2, 4], [1, 2, 4]))
  7193. def test_zero_weight(self):
  7194. # Values with zero weight have no impact on the energy distance.
  7195. assert_almost_equal(
  7196. stats.energy_distance([1, 2, 100000], [1, 1], [1, 1, 0], [1, 1]),
  7197. stats.energy_distance([1, 2], [1, 1], [1, 1], [1, 1]))
  7198. def test_inf_values(self):
  7199. # Inf values can lead to an inf distance or trigger a RuntimeWarning
  7200. # (and return NaN) if the distance is undefined.
  7201. assert_equal(stats.energy_distance([1, 2, np.inf], [1, 1]), np.inf)
  7202. assert_equal(
  7203. stats.energy_distance([1, 2, np.inf], [-np.inf, 1]),
  7204. np.inf)
  7205. assert_equal(
  7206. stats.energy_distance([1, -np.inf, np.inf], [1, 1]),
  7207. np.inf)
  7208. with pytest.warns(RuntimeWarning, match="invalid value"):
  7209. assert_equal(
  7210. stats.energy_distance([1, 2, np.inf], [np.inf, 1]),
  7211. np.nan)
  7212. @make_xp_test_case(stats.brunnermunzel)
  7213. class TestBrunnerMunzel:
  7214. # Data from (Lumley, 1996)
  7215. X = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1]
  7216. Y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4]
  7217. def test_brunnermunzel_one_sided(self, xp):
  7218. # Results are compared with R's lawstat package.
  7219. X, Y = xp.asarray(self.X), xp.asarray(self.Y)
  7220. u1, p1 = stats.brunnermunzel(X, Y, alternative='less')
  7221. u2, p2 = stats.brunnermunzel(Y, X, alternative='greater')
  7222. u3, p3 = stats.brunnermunzel(X, Y, alternative='greater')
  7223. u4, p4 = stats.brunnermunzel(Y, X, alternative='less')
  7224. xp_assert_close(p1, p2)
  7225. xp_assert_close(p3, p4)
  7226. assert p1 != p3
  7227. xp_assert_close(u1, xp.asarray(3.1374674823029505))
  7228. xp_assert_close(u2, xp.asarray(-3.1374674823029505))
  7229. xp_assert_close(u3, xp.asarray(3.1374674823029505))
  7230. xp_assert_close(u4, xp.asarray(-3.1374674823029505))
  7231. xp_assert_close(p1, xp.asarray(0.0028931043330757342))
  7232. xp_assert_close(p3, xp.asarray(0.99710689566692423))
  7233. def test_brunnermunzel_two_sided(self, xp):
  7234. # Results are compared with R's lawstat package.
  7235. X, Y = xp.asarray(self.X), xp.asarray(self.Y)
  7236. u1, p1 = stats.brunnermunzel(X, Y, alternative='two-sided')
  7237. u2, p2 = stats.brunnermunzel(Y, X, alternative='two-sided')
  7238. xp_assert_close(p1, xp.asarray(p2))
  7239. xp_assert_close(u1, xp.asarray(3.1374674823029505))
  7240. xp_assert_close(u2, xp.asarray(-3.1374674823029505))
  7241. xp_assert_close(p1, xp.asarray(0.0057862086661515377))
  7242. def test_brunnermunzel_default(self, xp):
  7243. # The default value for alternative is two-sided
  7244. X, Y = xp.asarray(self.X), xp.asarray(self.Y)
  7245. u1, p1 = stats.brunnermunzel(X, Y)
  7246. u2, p2 = stats.brunnermunzel(Y, X)
  7247. xp_assert_close(p1, p2)
  7248. xp_assert_close(u1, xp.asarray(3.1374674823029505))
  7249. xp_assert_close(u2, xp.asarray(-3.1374674823029505))
  7250. xp_assert_close(p1, xp.asarray(0.0057862086661515377))
  7251. def test_brunnermunzel_alternative_error(self, xp):
  7252. alternative = "error"
  7253. distribution = "t"
  7254. nan_policy = "propagate"
  7255. assert alternative not in ["two-sided", "greater", "less"]
  7256. message = "`alternative` must be 'less', 'greater', or 'two-sided'."
  7257. with pytest.raises(ValueError, match=message):
  7258. stats.brunnermunzel(xp.asarray(self.X), xp.asarray(self.Y),
  7259. alternative, distribution, nan_policy)
  7260. def test_brunnermunzel_distribution_norm(self, xp):
  7261. X, Y = xp.asarray(self.X), xp.asarray(self.Y)
  7262. u1, p1 = stats.brunnermunzel(X, Y, distribution="normal")
  7263. u2, p2 = stats.brunnermunzel(Y, X, distribution="normal")
  7264. xp_assert_close(p1, xp.asarray(p2))
  7265. xp_assert_close(u1, xp.asarray(3.1374674823029505))
  7266. xp_assert_close(u2, xp.asarray(-3.1374674823029505))
  7267. xp_assert_close(p1, xp.asarray(0.0017041417600383024))
  7268. def test_brunnermunzel_distribution_error(self, xp):
  7269. alternative = "two-sided"
  7270. distribution = "error"
  7271. nan_policy = "propagate"
  7272. assert distribution not in ["t", "normal"]
  7273. message = "distribution should be 't' or 'normal'"
  7274. with pytest.raises(ValueError, match=message):
  7275. stats.brunnermunzel(xp.asarray(self.X), xp.asarray(self.Y),
  7276. alternative, distribution, nan_policy)
  7277. @pytest.mark.parametrize("kwarg_update", [{'y': []}, {'x': []},
  7278. {'x': [], 'y': []}])
  7279. def test_brunnermunzel_empty_imput(self, kwarg_update, xp):
  7280. kwargs = {'x': self.X, 'y': self.Y}
  7281. kwargs.update(kwarg_update)
  7282. kwargs = {key:xp.asarray(val, dtype=xp_default_dtype(xp))
  7283. for key, val in kwargs.items()}
  7284. with eager_warns(SmallSampleWarning, match=too_small_1d_not_omit, xp=xp):
  7285. statistic, pvalue = stats.brunnermunzel(**kwargs)
  7286. xp_assert_equal(statistic, xp.asarray(xp.nan))
  7287. xp_assert_equal(pvalue, xp.asarray(xp.nan))
  7288. def test_brunnermunzel_nan_input_propagate(self, xp):
  7289. X = xp.asarray([1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, xp.nan])
  7290. Y = xp.asarray([3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4.])
  7291. u1, p1 = stats.brunnermunzel(X, Y, nan_policy="propagate")
  7292. u2, p2 = stats.brunnermunzel(Y, X, nan_policy="propagate")
  7293. xp_assert_equal(u1, xp.asarray(xp.nan))
  7294. xp_assert_equal(p1, xp.asarray(xp.nan))
  7295. xp_assert_equal(u2, xp.asarray(xp.nan))
  7296. xp_assert_equal(p2, xp.asarray(xp.nan))
  7297. def test_brunnermunzel_nan_input_raise(self, xp):
  7298. X = xp.asarray([1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, xp.nan])
  7299. Y = xp.asarray([3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4.])
  7300. alternative = "two-sided"
  7301. distribution = "t"
  7302. nan_policy = "raise"
  7303. message = "The input contains nan values"
  7304. with pytest.raises(ValueError, match=message):
  7305. stats.brunnermunzel(X, Y, alternative, distribution, nan_policy)
  7306. with pytest.raises(ValueError, match=message):
  7307. stats.brunnermunzel(Y, X, alternative, distribution, nan_policy)
  7308. def test_brunnermunzel_nan_input_omit(self, xp):
  7309. X = xp.asarray([1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, np.nan])
  7310. Y = xp.asarray([3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4.])
  7311. u1, p1 = stats.brunnermunzel(X, Y, nan_policy="omit")
  7312. u2, p2 = stats.brunnermunzel(Y, X, nan_policy="omit")
  7313. xp_assert_close(p1, p2)
  7314. xp_assert_close(u1, xp.asarray(3.1374674823029505))
  7315. xp_assert_close(u2, xp.asarray(-3.1374674823029505))
  7316. xp_assert_close(p1, xp.asarray(0.0057862086661515377))
  7317. @pytest.mark.parametrize("vectorized_call", [False, True])
  7318. def test_brunnermunzel_return_nan(self, vectorized_call, xp):
  7319. """ tests that a warning is emitted when p is nan
  7320. p-value with t-distributions can be nan (0/0) (see gh-15843)
  7321. """
  7322. x = xp.asarray([1., 2., 3.])
  7323. y = xp.asarray([5., 6., 7., 8., 9.])
  7324. if vectorized_call:
  7325. x = xp.stack((x, x)).T
  7326. y = xp.stack((y, y)).T
  7327. msg = "p-value cannot be estimated|divide by zero|invalid value encountered"
  7328. with eager_warns(RuntimeWarning, match=msg, xp=xp):
  7329. stats.brunnermunzel(x, y, distribution="t")
  7330. def test_brunnermunzel_normal_dist(self, xp):
  7331. """ tests that a p is 0 for datasets that cause p->nan
  7332. when t-distribution is used (see gh-15843)
  7333. """
  7334. x = xp.asarray([1., 2., 3.])
  7335. y = xp.asarray([5., 6., 7., 8., 9.])
  7336. with eager_warns(RuntimeWarning, match='divide by zero', xp=xp):
  7337. _, p = stats.brunnermunzel(x, y, distribution="normal")
  7338. xp_assert_equal(p, xp.asarray(0.))
  7339. class TestQuantileTest:
  7340. r""" Test the non-parametric quantile test,
  7341. including the computation of confidence intervals
  7342. """
  7343. def test_quantile_test_iv(self):
  7344. x = [1, 2, 3]
  7345. message = "`x` must be a one-dimensional array of numbers."
  7346. with pytest.raises(ValueError, match=message):
  7347. stats.quantile_test([x])
  7348. message = "`q` must be a scalar."
  7349. with pytest.raises(ValueError, match=message):
  7350. stats.quantile_test(x, q=[1, 2])
  7351. message = "`p` must be a float strictly between 0 and 1."
  7352. with pytest.raises(ValueError, match=message):
  7353. stats.quantile_test(x, p=[0.5, 0.75])
  7354. with pytest.raises(ValueError, match=message):
  7355. stats.quantile_test(x, p=2)
  7356. with pytest.raises(ValueError, match=message):
  7357. stats.quantile_test(x, p=-0.5)
  7358. message = "`alternative` must be one of..."
  7359. with pytest.raises(ValueError, match=message):
  7360. stats.quantile_test(x, alternative='one-sided')
  7361. message = "`confidence_level` must be a number between 0 and 1."
  7362. with pytest.raises(ValueError, match=message):
  7363. stats.quantile_test(x).confidence_interval(1)
  7364. @pytest.mark.parametrize(
  7365. 'p, alpha, lb, ub, alternative',
  7366. [[0.3, 0.95, 1.221402758160170, 1.476980793882643, 'two-sided'],
  7367. [0.5, 0.9, 1.506817785112854, 1.803988415397857, 'two-sided'],
  7368. [0.25, 0.95, -np.inf, 1.39096812846378, 'less'],
  7369. [0.8, 0.9, 2.117000016612675, np.inf, 'greater']]
  7370. )
  7371. def test_R_ci_quantile(self, p, alpha, lb, ub, alternative):
  7372. # Test against R library `confintr` function `ci_quantile`, e.g.
  7373. # library(confintr)
  7374. # options(digits=16)
  7375. # x <- exp(seq(0, 1, by = 0.01))
  7376. # ci_quantile(x, q = 0.3)$interval
  7377. # ci_quantile(x, q = 0.5, probs = c(0.05, 0.95))$interval
  7378. # ci_quantile(x, q = 0.25, probs = c(0, 0.95))$interval
  7379. # ci_quantile(x, q = 0.8, probs = c(0.1, 1))$interval
  7380. x = np.exp(np.arange(0, 1.01, 0.01))
  7381. res = stats.quantile_test(x, p=p, alternative=alternative)
  7382. assert_allclose(res.confidence_interval(alpha), [lb, ub], rtol=1e-15)
  7383. @pytest.mark.parametrize(
  7384. 'q, p, alternative, ref',
  7385. [[1.2, 0.3, 'two-sided', 0.01515567517648],
  7386. [1.8, 0.5, 'two-sided', 0.1109183496606]]
  7387. )
  7388. def test_R_pvalue(self, q, p, alternative, ref):
  7389. # Test against R library `snpar` function `quant.test`, e.g.
  7390. # library(snpar)
  7391. # options(digits=16)
  7392. # x < - exp(seq(0, 1, by=0.01))
  7393. # quant.test(x, q=1.2, p=0.3, exact=TRUE, alternative='t')
  7394. x = np.exp(np.arange(0, 1.01, 0.01))
  7395. res = stats.quantile_test(x, q=q, p=p, alternative=alternative)
  7396. assert_allclose(res.pvalue, ref, rtol=1e-12)
  7397. @pytest.mark.parametrize('case', ['continuous', 'discrete'])
  7398. @pytest.mark.parametrize('alternative', ['less', 'greater'])
  7399. @pytest.mark.parametrize('alpha', [0.9, 0.95])
  7400. def test_pval_ci_match(self, case, alternative, alpha):
  7401. # Verify that the following statement holds:
  7402. # The 95% confidence interval corresponding with alternative='less'
  7403. # has -inf as its lower bound, and the upper bound `xu` is the greatest
  7404. # element from the sample `x` such that:
  7405. # `stats.quantile_test(x, q=xu, p=p, alternative='less').pvalue``
  7406. # will be greater than 5%.
  7407. # And the corresponding statement for the alternative='greater' case.
  7408. seed = int((7**len(case) + len(alternative))*alpha)
  7409. rng = np.random.default_rng(seed)
  7410. if case == 'continuous':
  7411. p, q = rng.random(size=2)
  7412. rvs = rng.random(size=100)
  7413. else:
  7414. rvs = rng.integers(1, 11, size=100)
  7415. p = rng.random()
  7416. q = rng.integers(1, 11)
  7417. res = stats.quantile_test(rvs, q=q, p=p, alternative=alternative)
  7418. ci = res.confidence_interval(confidence_level=alpha)
  7419. # select elements inside the confidence interval based on alternative
  7420. if alternative == 'less':
  7421. i_inside = rvs <= ci.high
  7422. else:
  7423. i_inside = rvs >= ci.low
  7424. for x in rvs[i_inside]:
  7425. res = stats.quantile_test(rvs, q=x, p=p, alternative=alternative)
  7426. assert res.pvalue > 1 - alpha
  7427. for x in rvs[~i_inside]:
  7428. res = stats.quantile_test(rvs, q=x, p=p, alternative=alternative)
  7429. assert res.pvalue < 1 - alpha
  7430. def test_match_conover_examples(self):
  7431. # Test against the examples in [1] (Conover Practical Nonparametric
  7432. # Statistics Third Edition) pg 139
  7433. # Example 1
  7434. # Data is [189, 233, 195, 160, 212, 176, 231, 185, 199, 213, 202, 193,
  7435. # 174, 166, 248]
  7436. # Two-sided test of whether the upper quartile (p=0.75) equals 193
  7437. # (q=193). Conover shows that 7 of the observations are less than or
  7438. # equal to 193, and "for the binomial random variable Y, P(Y<=7) =
  7439. # 0.0173", so the two-sided p-value is twice that, 0.0346.
  7440. x = [189, 233, 195, 160, 212, 176, 231, 185, 199, 213, 202, 193,
  7441. 174, 166, 248]
  7442. pvalue_expected = 0.0346
  7443. res = stats.quantile_test(x, q=193, p=0.75, alternative='two-sided')
  7444. assert_allclose(res.pvalue, pvalue_expected, rtol=1e-5)
  7445. # Example 2
  7446. # Conover doesn't give explicit data, just that 8 out of 112
  7447. # observations are 60 or less. The test is whether the median time is
  7448. # equal to 60 against the alternative that the median is greater than
  7449. # 60. The p-value is calculated as P(Y<=8), where Y is again a binomial
  7450. # distributed random variable, now with p=0.5 and n=112. Conover uses a
  7451. # normal approximation, but we can easily calculate the CDF of the
  7452. # binomial distribution.
  7453. x = [59]*8 + [61]*(112-8)
  7454. pvalue_expected = stats.binom(p=0.5, n=112).pmf(k=8)
  7455. res = stats.quantile_test(x, q=60, p=0.5, alternative='greater')
  7456. assert_allclose(res.pvalue, pvalue_expected, atol=1e-10)
  7457. class TestPageTrendTest:
  7458. def setup_method(self):
  7459. self.rng = np.random.default_rng(1808365978)
  7460. # expected statistic and p-values generated using R at
  7461. # https://rdrr.io/cran/cultevo/, e.g.
  7462. # library(cultevo)
  7463. # data = rbind(c(72, 47, 73, 35, 47, 96, 30, 59, 41, 36, 56, 49, 81, 43,
  7464. # 70, 47, 28, 28, 62, 20, 61, 20, 80, 24, 50),
  7465. # c(68, 52, 60, 34, 44, 20, 65, 88, 21, 81, 48, 31, 31, 67,
  7466. # 69, 94, 30, 24, 40, 87, 70, 43, 50, 96, 43),
  7467. # c(81, 13, 85, 35, 79, 12, 92, 86, 21, 64, 16, 64, 68, 17,
  7468. # 16, 89, 71, 43, 43, 36, 54, 13, 66, 51, 55))
  7469. # result = page.test(data, verbose=FALSE)
  7470. # Most test cases generated to achieve common critical p-values so that
  7471. # results could be checked (to limited precision) against tables in
  7472. # scipy.stats.page_trend_test reference [1]
  7473. rng = np.random.default_rng(3113562111)
  7474. data_3_25 = rng.random((3, 25))
  7475. rng = np.random.default_rng(3113562111)
  7476. data_10_26 = rng.random((10, 26))
  7477. ts = [
  7478. (12949, 0.275539045444, False, 'asymptotic', data_3_25),
  7479. (47221, 0.5703651063709, False, 'asymptotic', data_10_26),
  7480. (12332, 0.7722477197436702, False, 'asymptotic',
  7481. [[72, 47, 73, 35, 47, 96, 30, 59, 41, 36, 56, 49, 81,
  7482. 43, 70, 47, 28, 28, 62, 20, 61, 20, 80, 24, 50],
  7483. [68, 52, 60, 34, 44, 20, 65, 88, 21, 81, 48, 31, 31,
  7484. 67, 69, 94, 30, 24, 40, 87, 70, 43, 50, 96, 43],
  7485. [81, 13, 85, 35, 79, 12, 92, 86, 21, 64, 16, 64, 68,
  7486. 17, 16, 89, 71, 43, 43, 36, 54, 13, 66, 51, 55]]),
  7487. (266, 4.121656378600823e-05, False, 'exact',
  7488. [[1.5, 4., 8.3, 5, 19, 11],
  7489. [5, 4, 3.5, 10, 20, 21],
  7490. [8.4, 3.2, 10, 12, 14, 15]]),
  7491. (332, 0.9566400920502488, True, 'exact',
  7492. [[4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1],
  7493. [4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1],
  7494. [3, 4, 1, 2], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4],
  7495. [1, 2, 3, 4], [1, 2, 3, 4]]),
  7496. (241, 0.9622210164861476, True, 'exact',
  7497. [[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  7498. [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  7499. [3, 2, 1], [2, 1, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3],
  7500. [1, 2, 3], [1, 2, 3], [1, 2, 3]]),
  7501. (197, 0.9619432897162209, True, 'exact',
  7502. [[6, 5, 4, 3, 2, 1], [6, 5, 4, 3, 2, 1], [1, 3, 4, 5, 2, 6]]),
  7503. (423, 0.9590458306880073, True, 'exact',
  7504. [[5, 4, 3, 2, 1], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1],
  7505. [5, 4, 3, 2, 1], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1],
  7506. [4, 1, 3, 2, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5],
  7507. [1, 2, 3, 4, 5]]),
  7508. (217, 0.9693058575034678, True, 'exact',
  7509. [[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  7510. [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  7511. [2, 1, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3],
  7512. [1, 2, 3]]),
  7513. (395, 0.991530289351305, True, 'exact',
  7514. [[7, 6, 5, 4, 3, 2, 1], [7, 6, 5, 4, 3, 2, 1],
  7515. [6, 5, 7, 4, 3, 2, 1], [1, 2, 3, 4, 5, 6, 7]]),
  7516. (117, 0.9997817843373017, True, 'exact',
  7517. [[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1],
  7518. [3, 2, 1], [3, 2, 1], [3, 2, 1], [2, 1, 3], [1, 2, 3]]),
  7519. ]
  7520. @pytest.mark.parametrize("L, p, ranked, method, data", ts)
  7521. def test_accuracy(self, L, p, ranked, method, data):
  7522. res = stats.page_trend_test(data, ranked=ranked, method=method)
  7523. assert_equal(L, res.statistic)
  7524. assert_allclose(p, res.pvalue)
  7525. assert_equal(method, res.method)
  7526. ts2 = [
  7527. (542, 0.9481266260876332, True, 'exact',
  7528. [[10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
  7529. [1, 8, 4, 7, 6, 5, 9, 3, 2, 10]]),
  7530. (1322, 0.9993113928199309, True, 'exact',
  7531. [[10, 9, 8, 7, 6, 5, 4, 3, 2, 1], [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
  7532. [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 2, 8, 7, 6, 5, 4, 3, 10, 1],
  7533. [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]),
  7534. (2286, 0.9908688345484833, True, 'exact',
  7535. [[8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1],
  7536. [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1],
  7537. [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1],
  7538. [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1],
  7539. [8, 7, 6, 5, 4, 3, 2, 1], [1, 3, 5, 6, 4, 7, 2, 8],
  7540. [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8],
  7541. [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8],
  7542. [1, 2, 3, 4, 5, 6, 7, 8]]),
  7543. ]
  7544. # only the first of these appears slow because intermediate data are
  7545. # cached and used on the rest
  7546. @pytest.mark.parametrize("L, p, ranked, method, data", ts2)
  7547. @pytest.mark.slow()
  7548. def test_accuracy2(self, L, p, ranked, method, data):
  7549. res = stats.page_trend_test(data, ranked=ranked, method=method)
  7550. assert_equal(L, res.statistic)
  7551. assert_allclose(p, res.pvalue)
  7552. assert_equal(method, res.method)
  7553. def test_options(self):
  7554. rng = np.random.default_rng(183973867)
  7555. m, n = 10, 20
  7556. predicted_ranks = np.arange(1, n+1)
  7557. perm = rng.permutation(np.arange(n))
  7558. data = rng.random((m, n))
  7559. ranks = stats.rankdata(data, axis=1)
  7560. res1 = stats.page_trend_test(ranks)
  7561. res2 = stats.page_trend_test(ranks, ranked=True)
  7562. res3 = stats.page_trend_test(data, ranked=False)
  7563. res4 = stats.page_trend_test(ranks, predicted_ranks=predicted_ranks)
  7564. res5 = stats.page_trend_test(ranks[:, perm],
  7565. predicted_ranks=predicted_ranks[perm])
  7566. assert_equal(res1.statistic, res2.statistic)
  7567. assert_equal(res1.statistic, res3.statistic)
  7568. assert_equal(res1.statistic, res4.statistic)
  7569. assert_equal(res1.statistic, res5.statistic)
  7570. def test_Ames_assay(self):
  7571. # test from _page_trend_test.py [2] page 151; data on page 144
  7572. data = [[101, 117, 111], [91, 90, 107], [103, 133, 121],
  7573. [136, 140, 144], [190, 161, 201], [146, 120, 116]]
  7574. data = np.array(data).T
  7575. predicted_ranks = np.arange(1, 7)
  7576. res = stats.page_trend_test(data, ranked=False,
  7577. predicted_ranks=predicted_ranks,
  7578. method="asymptotic")
  7579. assert_equal(res.statistic, 257)
  7580. assert_almost_equal(res.pvalue, 0.0035, decimal=4)
  7581. res = stats.page_trend_test(data, ranked=False,
  7582. predicted_ranks=predicted_ranks,
  7583. method="exact")
  7584. assert_equal(res.statistic, 257)
  7585. assert_almost_equal(res.pvalue, 0.0023, decimal=4)
  7586. def test_input_validation(self):
  7587. # test data not a 2d array
  7588. with assert_raises(ValueError, match="`data` must be a 2d array."):
  7589. stats.page_trend_test(None)
  7590. with assert_raises(ValueError, match="`data` must be a 2d array."):
  7591. stats.page_trend_test([])
  7592. with assert_raises(ValueError, match="`data` must be a 2d array."):
  7593. stats.page_trend_test([1, 2])
  7594. with assert_raises(ValueError, match="`data` must be a 2d array."):
  7595. stats.page_trend_test([[[1]]])
  7596. # test invalid dimensions
  7597. rng = np.random.default_rng(2482566048)
  7598. with assert_raises(ValueError, match="Page's L is only appropriate"):
  7599. stats.page_trend_test(rng.random((1, 3)))
  7600. with assert_raises(ValueError, match="Page's L is only appropriate"):
  7601. stats.page_trend_test(rng.random((2, 2)))
  7602. # predicted ranks must include each integer [1, 2, 3] exactly once
  7603. message = "`predicted_ranks` must include each integer"
  7604. with assert_raises(ValueError, match=message):
  7605. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  7606. predicted_ranks=[0, 1, 2])
  7607. with assert_raises(ValueError, match=message):
  7608. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  7609. predicted_ranks=[1.1, 2, 3])
  7610. with assert_raises(ValueError, match=message):
  7611. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  7612. predicted_ranks=[1, 2, 3, 3])
  7613. with assert_raises(ValueError, match=message):
  7614. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  7615. predicted_ranks="invalid")
  7616. # test improperly ranked data
  7617. with assert_raises(ValueError, match="`data` is not properly ranked"):
  7618. stats.page_trend_test([[0, 2, 3], [1, 2, 3]], True)
  7619. with assert_raises(ValueError, match="`data` is not properly ranked"):
  7620. stats.page_trend_test([[1, 2, 3], [1, 2, 4]], True)
  7621. # various
  7622. with assert_raises(ValueError, match="`data` contains NaNs"):
  7623. stats.page_trend_test([[1, 2, 3], [1, 2, np.nan]],
  7624. ranked=False)
  7625. with assert_raises(ValueError, match="`method` must be in"):
  7626. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  7627. method="ekki")
  7628. with assert_raises(TypeError, match="`ranked` must be boolean."):
  7629. stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]],
  7630. ranked="ekki")
  7631. rng = np.random.default_rng(902340982)
  7632. x = rng.random(10)
  7633. y = rng.random(10)
  7634. @pytest.mark.parametrize("fun, args",
  7635. [(stats.wilcoxon, (x,)),
  7636. (stats.ks_1samp, (x, stats.norm.cdf)), # type: ignore[attr-defined] # noqa: E501
  7637. (stats.ks_2samp, (x, y)),
  7638. (stats.kstest, (x, y)),
  7639. ])
  7640. def test_rename_mode_method(fun, args):
  7641. res = fun(*args, method='exact')
  7642. res2 = fun(*args, mode='exact')
  7643. assert_equal(res, res2)
  7644. err = rf"{fun.__name__}() got multiple values for argument"
  7645. with pytest.raises(TypeError, match=re.escape(err)):
  7646. fun(*args, method='exact', mode='exact')
  7647. class TestExpectile:
  7648. def test_same_as_mean(self):
  7649. rng = np.random.default_rng(42)
  7650. x = rng.random(size=20)
  7651. assert_allclose(stats.expectile(x, alpha=0.5), np.mean(x))
  7652. def test_minimum(self):
  7653. rng = np.random.default_rng(42)
  7654. x = rng.random(size=20)
  7655. assert_allclose(stats.expectile(x, alpha=0), np.amin(x))
  7656. def test_maximum(self):
  7657. rng = np.random.default_rng(42)
  7658. x = rng.random(size=20)
  7659. assert_allclose(stats.expectile(x, alpha=1), np.amax(x))
  7660. def test_weights(self):
  7661. # expectile should minimize `fun` defined below; see
  7662. # F. Sobotka and T. Kneib, "Geoadditive expectile regression",
  7663. # Computational Statistics and Data Analysis 56 (2012) 755-767
  7664. # :doi:`10.1016/j.csda.2010.11.015`
  7665. rng = np.random.default_rng(1856392524598679138)
  7666. def fun(u, a, alpha, weights):
  7667. w = np.full_like(a, fill_value=alpha)
  7668. w[a <= u] = 1 - alpha
  7669. return np.sum(w * weights * (a - u)**2)
  7670. def expectile2(a, alpha, weights):
  7671. bracket = np.min(a), np.max(a)
  7672. return optimize.minimize_scalar(fun, bracket=bracket,
  7673. args=(a, alpha, weights)).x
  7674. n = 10
  7675. a = rng.random(n)
  7676. alpha = rng.random()
  7677. weights = rng.random(n)
  7678. res = stats.expectile(a, alpha, weights=weights)
  7679. ref = expectile2(a, alpha, weights)
  7680. assert_allclose(res, ref)
  7681. @pytest.mark.parametrize(
  7682. "alpha", [0.2, 0.5 - 1e-12, 0.5, 0.5 + 1e-12, 0.8]
  7683. )
  7684. @pytest.mark.parametrize("n", [20, 2000])
  7685. def test_expectile_properties(self, alpha, n):
  7686. """
  7687. See Section 6 of
  7688. I. Steinwart, C. Pasin, R.C. Williamson & S. Zhang (2014).
  7689. "Elicitation and Identification of Properties". COLT.
  7690. http://proceedings.mlr.press/v35/steinwart14.html
  7691. and
  7692. Propositions 5, 6, 7 of
  7693. F. Bellini, B. Klar, and A. Müller and E. Rosazza Gianin (2013).
  7694. "Generalized Quantiles as Risk Measures"
  7695. http://doi.org/10.2139/ssrn.2225751
  7696. """
  7697. rng = np.random.default_rng(42)
  7698. x = rng.normal(size=n)
  7699. # 0. definite / constancy
  7700. # Let T(X) denote the expectile of rv X ~ F.
  7701. # T(c) = c for constant c
  7702. for c in [-5, 0, 0.5]:
  7703. assert_allclose(
  7704. stats.expectile(np.full(shape=n, fill_value=c), alpha=alpha),
  7705. c
  7706. )
  7707. # 1. translation equivariance
  7708. # T(X + c) = T(X) + c
  7709. c = rng.exponential()
  7710. assert_allclose(
  7711. stats.expectile(x + c, alpha=alpha),
  7712. stats.expectile(x, alpha=alpha) + c,
  7713. )
  7714. assert_allclose(
  7715. stats.expectile(x - c, alpha=alpha),
  7716. stats.expectile(x, alpha=alpha) - c,
  7717. )
  7718. # 2. positively homogeneity
  7719. # T(cX) = c * T(X) for c > 0
  7720. assert_allclose(
  7721. stats.expectile(c * x, alpha=alpha),
  7722. c * stats.expectile(x, alpha=alpha),
  7723. )
  7724. # 3. subadditivity
  7725. # Note that subadditivity holds for alpha >= 0.5.
  7726. # T(X + Y) <= T(X) + T(Y)
  7727. # For alpha = 0.5, i.e. the mean, strict equality holds.
  7728. # For alpha < 0.5, one can use property 6. to show
  7729. # T(X + Y) >= T(X) + T(Y)
  7730. y = rng.logistic(size=n, loc=10) # different distribution than x
  7731. if alpha == 0.5:
  7732. def assert_op(a, b):
  7733. assert_allclose(a, b)
  7734. elif alpha > 0.5:
  7735. def assert_op(a, b):
  7736. assert a < b
  7737. else:
  7738. def assert_op(a, b):
  7739. assert a > b
  7740. assert_op(
  7741. stats.expectile(np.r_[x + y], alpha=alpha),
  7742. stats.expectile(x, alpha=alpha)
  7743. + stats.expectile(y, alpha=alpha)
  7744. )
  7745. # 4. monotonicity
  7746. # This holds for first order stochastic dominance X:
  7747. # X >= Y whenever P(X <= x) < P(Y <= x)
  7748. # T(X) <= T(Y) whenever X <= Y
  7749. y = rng.normal(size=n, loc=5)
  7750. assert (
  7751. stats.expectile(x, alpha=alpha) <= stats.expectile(y, alpha=alpha)
  7752. )
  7753. # 5. convexity for alpha > 0.5, concavity for alpha < 0.5
  7754. # convexity is
  7755. # T((1 - c) X + c Y) <= (1 - c) T(X) + c T(Y) for 0 <= c <= 1
  7756. y = rng.logistic(size=n, loc=10)
  7757. for c in [0.1, 0.5, 0.8]:
  7758. assert_op(
  7759. stats.expectile((1-c)*x + c*y, alpha=alpha),
  7760. (1-c) * stats.expectile(x, alpha=alpha) +
  7761. c * stats.expectile(y, alpha=alpha)
  7762. )
  7763. # 6. negative argument
  7764. # T_{alpha}(-X) = -T_{1-alpha}(X)
  7765. assert_allclose(
  7766. stats.expectile(-x, alpha=alpha),
  7767. -stats.expectile(x, alpha=1-alpha),
  7768. )
  7769. @pytest.mark.parametrize("n", [20, 2000])
  7770. def test_monotonicity_in_alpha(self, n):
  7771. rng = np.random.default_rng(42)
  7772. x = rng.pareto(a=2, size=n)
  7773. e_list = []
  7774. alpha_seq = np.logspace(-15, np.log10(0.5), 100)
  7775. # sorted list of unique alpha values in interval (0, 1)
  7776. for alpha in np.r_[0, alpha_seq, 1 - alpha_seq[:-1:-1], 1]:
  7777. e_list.append(stats.expectile(x, alpha=alpha))
  7778. assert np.all(np.diff(e_list) > 0)
  7779. @make_xp_test_case(stats.lmoment)
  7780. class TestLMoment:
  7781. # data from https://github.com/scipy/scipy/issues/19460
  7782. data = [0.87, 0.87, 1.29, 1.5, 1.7, 0.66, 1.5, 0.5, 1., 1.25, 2.3,
  7783. 1.03, 2.85, 0.68, 1.74, 1.94, 0.63, 2.04, 1.2, 0.64, 2.05, 0.97,
  7784. 2.81, 1.02, 2.76, 0.86, 1.36, 1.29, 1.68, 0.72, 1.67, 1.15, 3.26,
  7785. 0.93, 0.83, 0.91, 0.92, 2.32, 1.12, 3.21, 1.23, 1.22, 1.29, 2.08,
  7786. 0.64, 2.83, 2.68, 1.77, 0.69, 1.69, 0.7, 1.83, 2.25, 1.23, 1.17,
  7787. 0.94, 1.22, 0.76, 0.69, 0.48, 1.04, 2.49, 1.38, 1.57, 1.79, 1.59,
  7788. 1.3, 1.54, 1.07, 1.03, 0.76, 2.35, 2.05, 2.02, 2.36, 1.59, 0.97,
  7789. 1.63, 1.66, 0.94, 1.45, 1.26, 1.25, 0.68, 2.96, 0.8, 1.16, 0.82,
  7790. 0.64, 0.87, 1.33, 1.28, 1.26, 1.19, 1.24, 1.12, 1.45, 1.03, 1.37,
  7791. 1.4, 1.35, 1.28, 1.04, 1.31, 0.87, 0.96, 2.55, 1.72, 1.05, 1.15,
  7792. 1.73, 1.03, 1.53, 2.41, 1.36, 2.08, 0.92, 0.73, 1.56, 1.94, 0.78]
  7793. not_integers = [1.5, [1, 2, 3.5], math.nan, math.inf]
  7794. def test_dtype_iv(self, xp):
  7795. message = '`sample` must be an array of real numbers.'
  7796. with pytest.raises(ValueError, match=message):
  7797. stats.lmoment(xp.asarray(self.data, dtype=xp.complex128))
  7798. @skip_xp_invalid_arg
  7799. def test_dtype_iv_non_numeric(self):
  7800. message = '`sample` must be an array of real numbers.'
  7801. with pytest.raises(ValueError, match=message):
  7802. stats.lmoment(np.array(self.data, dtype=object))
  7803. @pytest.mark.parametrize('order', not_integers + [0, -1, [], [[1, 2, 3]]])
  7804. def test_order_iv(self, order, xp):
  7805. message = '`order` must be a scalar or a non-empty...'
  7806. with pytest.raises(ValueError, match=message):
  7807. stats.lmoment(xp.asarray(self.data), order=order)
  7808. @pytest.mark.parametrize('axis', not_integers)
  7809. def test_axis_iv(self, axis, xp):
  7810. message = '`axis` must be an integer'
  7811. with pytest.raises(ValueError, match=message):
  7812. stats.lmoment(xp.asarray(self.data), axis=axis)
  7813. @pytest.mark.parametrize('sorted', not_integers)
  7814. def test_sorted_iv(self, sorted, xp):
  7815. message = '`sorted` must be True or False.'
  7816. with pytest.raises(ValueError, match=message):
  7817. stats.lmoment(xp.asarray(self.data), sorted=sorted)
  7818. @pytest.mark.parametrize('standardize', not_integers)
  7819. def test_standardize_iv(self, standardize, xp):
  7820. message = '`standardize` must be True or False.'
  7821. with pytest.raises(ValueError, match=message):
  7822. stats.lmoment(xp.asarray(self.data), standardize=standardize)
  7823. @pytest.mark.parametrize('order', [1, 4, [1, 2, 3, 4]])
  7824. @pytest.mark.parametrize('standardize', [False, True])
  7825. @pytest.mark.parametrize('presorted', [False, True])
  7826. def test_lmoment(self, order, standardize, presorted, xp):
  7827. # Reference values from R package `lmom`
  7828. # options(digits=16)
  7829. # library(lmom)
  7830. # data= c(0.87, 0.87,..., 1.94, 0.78)
  7831. # samlmu(data)
  7832. ref = xp.asarray([1.4087603305785130, 0.3415936639118458,
  7833. 0.2189964482831403, 0.1328186463415905])
  7834. if not standardize:
  7835. ref = xpx.at(ref)[2:].multiply(ref[1])
  7836. data = sorted(self.data) if presorted else self.data
  7837. data = xp.asarray(data)
  7838. res = stats.lmoment(data, order, standardize=standardize, sorted=presorted)
  7839. xp_assert_close(res, ref[xp.asarray(order)-1])
  7840. def test_dtype(self, xp):
  7841. dtype = xp.float32
  7842. sample = xp.asarray(self.data)
  7843. res = stats.lmoment(xp.astype(sample, dtype))
  7844. ref = xp.astype(stats.lmoment(sample), dtype)
  7845. xp_assert_close(res, ref, rtol=1e-4)
  7846. dtype = xp.int64
  7847. sample = xp.asarray([1, 2, 3, 4, 5])
  7848. res = stats.lmoment(xp.astype(sample, dtype))
  7849. ref = stats.lmoment(xp.astype(sample, xp_default_dtype(xp)))
  7850. xp_assert_close(res, ref)
  7851. @pytest.mark.parametrize("axis", [0, 1])
  7852. def test_axis(self, axis, xp):
  7853. # nd input is tested extensively in `test_axis_nan_policy`, but only for NumPy
  7854. rng = np.random.default_rng(234923498149931248151)
  7855. x = rng.random(size=(10, 11))
  7856. res = stats.lmoment(xp.asarray(x), axis=axis)
  7857. ref = xp.asarray(stats.lmoment(x, axis=axis))
  7858. xp_assert_close(res, ref)
  7859. class TestXP_Mean:
  7860. @pytest.mark.parametrize('axis', [None, 1, -1, (-2, 2)])
  7861. @pytest.mark.parametrize('weights', [None, True])
  7862. @pytest.mark.parametrize('keepdims', [False, True])
  7863. def test_xp_mean_basic(self, xp, axis, weights, keepdims):
  7864. rng = np.random.default_rng(90359458245906)
  7865. x = rng.random((3, 4, 5))
  7866. x_xp = xp.asarray(x)
  7867. w = w_xp = None
  7868. if weights:
  7869. w = rng.random((1, 5))
  7870. w_xp = xp.asarray(w)
  7871. x, w = np.broadcast_arrays(x, w)
  7872. res = _xp_mean(x_xp, weights=w_xp, axis=axis, keepdims=keepdims)
  7873. ref = np.average(x, weights=w, axis=axis, keepdims=keepdims)
  7874. xp_assert_close(res, xp.asarray(ref))
  7875. def test_non_broadcastable(self, xp):
  7876. # non-broadcastable x and weights
  7877. x, w = xp.arange(10.), xp.zeros(5)
  7878. message = "Array shapes are incompatible for broadcasting."
  7879. with pytest.raises(ValueError, match=message):
  7880. _xp_mean(x, weights=w)
  7881. @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
  7882. @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
  7883. def test_special_cases(self, xp):
  7884. # weights sum to zero
  7885. weights = xp.asarray([-1., 0., 1.])
  7886. res = _xp_mean(xp.asarray([1., 1., 1.]), weights=weights)
  7887. xp_assert_close(res, xp.asarray(xp.nan))
  7888. res = _xp_mean(xp.asarray([2., 1., 1.]), weights=weights)
  7889. xp_assert_close(res, xp.asarray(-np.inf))
  7890. res = _xp_mean(xp.asarray([1., 1., 2.]), weights=weights)
  7891. xp_assert_close(res, xp.asarray(np.inf))
  7892. @pytest.mark.filterwarnings(
  7893. "ignore:invalid value encountered:RuntimeWarning"
  7894. ) # for dask
  7895. def test_nan_policy(self, xp):
  7896. x = xp.arange(10.)
  7897. mask = (x == 3)
  7898. x = xp.where(mask, xp.nan, x)
  7899. # nan_policy='raise' raises an error
  7900. if is_lazy_array(x):
  7901. with pytest.raises(TypeError, match='not supported for lazy arrays'):
  7902. _xp_mean(x, nan_policy='raise')
  7903. else:
  7904. with pytest.raises(ValueError, match='The input contains nan values'):
  7905. _xp_mean(x, nan_policy='raise')
  7906. # `nan_policy='propagate'` is the default, and the result is NaN
  7907. res1 = _xp_mean(x)
  7908. res2 = _xp_mean(x, nan_policy='propagate')
  7909. ref = xp.asarray(xp.nan)
  7910. xp_assert_equal(res1, ref)
  7911. xp_assert_equal(res2, ref)
  7912. # `nan_policy='omit'` omits NaNs in `x`
  7913. res = _xp_mean(x, nan_policy='omit')
  7914. ref = xp.mean(x[~mask])
  7915. xp_assert_close(res, ref)
  7916. # `nan_policy='omit'` omits NaNs in `weights`, too
  7917. weights = xp.ones(10)
  7918. weights = xp.where(mask, xp.nan, weights)
  7919. res = _xp_mean(xp.arange(10.), weights=weights, nan_policy='omit')
  7920. ref = xp.mean(x[~mask])
  7921. xp_assert_close(res, ref)
  7922. @skip_xp_backends(eager_only=True)
  7923. def test_nan_policy_warns(self, xp):
  7924. x = xp.arange(10.)
  7925. x = xp.where(x == 3, xp.nan, x)
  7926. # Check for warning if omitting NaNs causes empty slice
  7927. message = 'After omitting NaNs...'
  7928. with pytest.warns(RuntimeWarning, match=message):
  7929. res = _xp_mean(x * np.nan, nan_policy='omit')
  7930. ref = xp.asarray(xp.nan)
  7931. xp_assert_equal(res, ref)
  7932. def test_empty(self, xp):
  7933. message = 'One or more sample arguments is too small...'
  7934. with pytest.warns(SmallSampleWarning, match=message):
  7935. res = _xp_mean(xp.asarray([]))
  7936. ref = xp.asarray(xp.nan)
  7937. xp_assert_equal(res, ref)
  7938. message = "All axis-slices of one or more sample arguments..."
  7939. with pytest.warns(SmallSampleWarning, match=message):
  7940. res = _xp_mean(xp.asarray([[]]), axis=1)
  7941. ref = xp.asarray([xp.nan])
  7942. xp_assert_equal(res, ref)
  7943. res = _xp_mean(xp.asarray([[]]), axis=0)
  7944. ref = xp.asarray([])
  7945. xp_assert_equal(res, ref)
  7946. @pytest.mark.filterwarnings(
  7947. "ignore:overflow encountered in reduce:RuntimeWarning"
  7948. ) # for dask
  7949. def test_dtype(self, xp):
  7950. max = xp.finfo(xp.float32).max
  7951. x_np = np.asarray([max, max], dtype=np.float32)
  7952. x_xp = xp.asarray(x_np)
  7953. # Overflow occurs for float32 input
  7954. with np.errstate(over='ignore'):
  7955. res = _xp_mean(x_xp)
  7956. ref = np.mean(x_np)
  7957. np.testing.assert_equal(ref, np.inf)
  7958. xp_assert_close(res, xp.asarray(ref))
  7959. # correct result is returned if `float64` is used
  7960. res = _xp_mean(x_xp, dtype=xp.float64)
  7961. ref = xp.asarray(np.mean(np.asarray(x_np, dtype=np.float64)))
  7962. xp_assert_close(res, ref)
  7963. def test_integer(self, xp):
  7964. # integer inputs are converted to the appropriate float
  7965. x = xp.arange(10)
  7966. y = xp.arange(10.)
  7967. xp_assert_equal(_xp_mean(x), _xp_mean(y))
  7968. xp_assert_equal(_xp_mean(y, weights=x), _xp_mean(y, weights=y))
  7969. def test_complex_gh22404(self, xp):
  7970. rng = np.random.default_rng(90359458245906)
  7971. x, y, wx, wy = rng.random((4, 20))
  7972. res = _xp_mean(xp.asarray(x + y*1j), weights=xp.asarray(wx + wy*1j))
  7973. ref = np.average(x + y*1j, weights=wx + wy*1j)
  7974. xp_assert_close(res, xp.asarray(ref))
  7975. class TestXP_Var:
  7976. @pytest.mark.parametrize('axis', [None, 1, -1, (-2, 2)])
  7977. @pytest.mark.parametrize('keepdims', [False, True])
  7978. @pytest.mark.parametrize('correction', [0, 1])
  7979. @pytest.mark.parametrize('nan_policy', ['propagate', 'omit'])
  7980. def test_xp_var_basic(self, xp, axis, keepdims, correction, nan_policy):
  7981. rng = np.random.default_rng(90359458245906)
  7982. x = rng.random((3, 4, 5))
  7983. var_ref = np.var
  7984. if nan_policy == 'omit':
  7985. nan_mask = rng.random(size=x.shape) > 0.5
  7986. x[nan_mask] = np.nan
  7987. var_ref = np.nanvar
  7988. x_xp = xp.asarray(x)
  7989. res = _xp_var(x_xp, axis=axis, keepdims=keepdims, correction=correction,
  7990. nan_policy=nan_policy)
  7991. with warnings.catch_warnings():
  7992. warnings.filterwarnings(
  7993. "ignore", "Degrees of freedom <= 0 for slice", RuntimeWarning)
  7994. ref = var_ref(x, axis=axis, keepdims=keepdims, ddof=correction)
  7995. xp_assert_close(res, xp.asarray(ref))
  7996. def test_special_cases(self, xp):
  7997. # correction too big
  7998. res = _xp_var(xp.asarray([1., 2.]), correction=3)
  7999. xp_assert_close(res, xp.asarray(xp.nan))
  8000. def test_nan_policy(self, xp):
  8001. x = xp.arange(10.)
  8002. mask = (x == 3)
  8003. x = xp.where(mask, xp.nan, x)
  8004. # `nan_policy='propagate'` is the default, and the result is NaN
  8005. res1 = _xp_var(x)
  8006. res2 = _xp_var(x, nan_policy='propagate')
  8007. ref = xp.asarray(xp.nan)
  8008. xp_assert_equal(res1, ref)
  8009. xp_assert_equal(res2, ref)
  8010. # `nan_policy='omit'` omits NaNs in `x`
  8011. res = _xp_var(x, nan_policy='omit')
  8012. ref = xp.var(x[~mask])
  8013. xp_assert_close(res, ref)
  8014. @skip_xp_backends(eager_only=True)
  8015. def test_nan_policy_warns(self, xp):
  8016. x = xp.arange(10.)
  8017. x = xp.where(x == 3, xp.nan, x)
  8018. # Check for warning if omitting NaNs causes empty slice
  8019. message = 'After omitting NaNs...'
  8020. with pytest.warns(RuntimeWarning, match=message):
  8021. res = _xp_var(x * np.nan, nan_policy='omit')
  8022. ref = xp.asarray(xp.nan)
  8023. xp_assert_equal(res, ref)
  8024. @skip_xp_backends(eager_only=True)
  8025. def test_nan_policy_raise(self, xp):
  8026. # nan_policy='raise' raises an error when NaNs are present
  8027. message = 'The input contains nan values'
  8028. with pytest.raises(ValueError, match=message):
  8029. _xp_var(xp.asarray([1, 2, xp.nan]), nan_policy='raise')
  8030. def test_empty(self, xp):
  8031. message = 'One or more sample arguments is too small...'
  8032. with pytest.warns(SmallSampleWarning, match=message):
  8033. res = _xp_var(xp.asarray([]))
  8034. ref = xp.asarray(xp.nan)
  8035. xp_assert_equal(res, ref)
  8036. message = "All axis-slices of one or more sample arguments..."
  8037. with pytest.warns(SmallSampleWarning, match=message):
  8038. res = _xp_var(xp.asarray([[]]), axis=1)
  8039. ref = xp.asarray([xp.nan])
  8040. xp_assert_equal(res, ref)
  8041. res = _xp_var(xp.asarray([[]]), axis=0)
  8042. ref = xp.asarray([])
  8043. xp_assert_equal(res, ref)
  8044. @pytest.mark.filterwarnings(
  8045. "ignore:overflow encountered in reduce:RuntimeWarning"
  8046. ) # Overflow occurs for float32 input
  8047. def test_dtype(self, xp):
  8048. max = xp.finfo(xp.float32).max
  8049. x_np = np.asarray([max, max/2], dtype=np.float32)
  8050. x_xp = xp.asarray(x_np)
  8051. res = _xp_var(x_xp)
  8052. ref = np.var(x_np)
  8053. np.testing.assert_equal(ref, np.inf)
  8054. xp_assert_close(res, xp.asarray(ref))
  8055. # correct result is returned if `float64` is used
  8056. res = _xp_var(x_xp, dtype=xp.float64)
  8057. ref = xp.asarray(np.var(np.asarray(x_np, dtype=np.float64)))
  8058. xp_assert_close(res, ref)
  8059. def test_integer(self, xp):
  8060. # integer inputs are converted to the appropriate float
  8061. x = xp.arange(10)
  8062. y = xp.arange(10.)
  8063. xp_assert_equal(_xp_var(x), _xp_var(y))
  8064. def test_complex_gh22404(self, xp):
  8065. rng = np.random.default_rng(90359458245906)
  8066. x, y = rng.random((2, 20))
  8067. res = _xp_var(xp.asarray(x + y*1j))
  8068. ref = np.var(x + y*1j)
  8069. xp_assert_close(res, xp.asarray(ref), check_dtype=False)
  8070. def test_chk_asarray(xp):
  8071. rng = np.random.default_rng(2348923425434)
  8072. x0 = rng.random(size=(2, 3, 4))
  8073. x = xp.asarray(x0)
  8074. axis = 1
  8075. x_out, axis_out = _chk_asarray(x, axis=axis, xp=xp)
  8076. xp_assert_equal(x_out, xp.asarray(x0))
  8077. assert_equal(axis_out, axis)
  8078. axis = None
  8079. x_out, axis_out = _chk_asarray(x, axis=axis, xp=xp)
  8080. xp_assert_equal(x_out, xp.asarray(x0.ravel()))
  8081. assert_equal(axis_out, 0)
  8082. axis = 2
  8083. x_out, axis_out = _chk_asarray(x[0, 0, 0], axis=axis, xp=xp)
  8084. xp_assert_equal(x_out, xp.asarray(np.atleast_1d(x0[0, 0, 0])))
  8085. assert_equal(axis_out, axis)